Using instancedSkinnedMesh in Three.js, enabling the rendering of hundreds of 3D characters on screen simultaneously
Saga
Posted on July 9, 2024
In Threejs products, we all know that object instantiation is necessary to improve rendering performance. However, the official instancedMesh provided by Threejs only supports static objects. In our product https://timmerse.com, we need to accommodate dozens or even hundreds of people participating in the same event. During testing, we found that when the number of Avatars exceeds twenty, it causes frame drops on the page. It cannot achieve a smooth rendering efficiency of 60 frames per second.
We have made several optimization attempts for 3D rendering:
- Reducing draw calls
We performed model simplification, reducing the number of triangles in the scene as much as possible without affecting the overall rendering effect. There's a general method for this, and since our scene is already low-poly, many elements still maintain good rendering quality after simplification. This significantly reduced the number of triangles in the scene. Additionally, some advanced rendering effects, such as CSM (Cascaded Shadow Maps), are only enabled on high-performance machines. Below is an implementation of GPU performance detection capability:
export class GpuDetector {
gpu: string;
_level: 'high' | 'low' | 'middle' = 'high';
constructor() {
this.gpu = getGPUModel();
this.detectPC();
isMobileOrCloud ? this.detectMobile() : this.detectPC();
console.log('GPU: ', this.gpu, ';level:', this._level);
}
get level(): 'high' | 'low' | 'middle' {
return this._level;
}
detectMobile() {
if (iOS) {
this._level = window.screen.height >= 812 && window.devicePixelRatio >= 2 ? 'high' : 'low';
}
if (/adreno/i.test(this.gpu)) {
this._level = this.adrenoGPU();
} else if (/mali/i.test(this.gpu)) {
this._level = this.maliGPU();
} else if (/powervr/i.test(this.gpu)) {
this._level = this.powerVRGPU();
}
}
detectPC() {
if (!this.gpu) {
this._level = 'low'
}
/apple m/i.test(this.gpu)
? (this._level = 'high')
: /apple/i.test(this.gpu)
? (this._level = 'middle')
: /nvidia/i.test(this.gpu)
? (this._level = this.nvidiaGPU())
: /amd/i.test(this.gpu)
? (this._level = this.amdGPU())
: /intel/i.test(this.gpu) && (this._level = this.intelGPU());
}
powerVRGPU() {
return /GT8/i.test(this.gpu) ? 'high' : 'low';
}
adrenoGPU() {
var r = /^.+adreno\D+(\d+).+$/i.exec(this.gpu);
if (r !== null) {
var t = parseInt(r[1]);
return t > 640 ? 'high' : t >= 570 ? 'middle' : 'low';
}
var e = this.gpu.split(' '),
t = parseInt(e[e.length - 1]);
return t > 640 ? 'high' : t >= 570 ? 'middle' : 'low';
}
maliGPU() {
if (/mali-g/i.test(this.gpu)) {
var e = this.gpu.split('Mali-G'),
t = parseInt(e[e.length - 1]);
return t > 77 ? 'high' : 76 === t || 31 === t || 52 === t ? 'middle' : 'low';
}
return 'low';
}
nvidiaGPU() {
return /(rtx|titan)/i.test(this.gpu) ? 'high' : /gtx/i.test(this.gpu) ? 'middle' : 'low';
}
amdGPU() {
if (/(pro|radeon vii)/i.test(this.gpu)) return 'middle';
// if (/(pro|radeon vii)/i.test(this.gpu)) return 'high';
if (/(rx)/i.test(this.gpu)) {
var e = this.gpu.split('RX ');
return parseInt(e[e.length - 1]) > 560 ? 'middle' : 'low';
// return parseInt(e[e.length - 1]) > 560 ? 'high' : 'middle';
}
return 'middle';
}
// Intel gpu
intelGPU() {
if (/iris/i.test(this.gpu)) {
if (/opengl engine/i.test(this.gpu)) return 'middle';
var e = this.gpu.split('Graphics ');
return parseInt(e[1]) >= 650 ? 'middle' : 'low';
}
if (/HD/i.test(this.gpu)) {
var t = this.gpu.split('HD ');
return parseInt(t[1]) > 7e3 ? 'middle' : 'low';
}
return /apple/i.test(this.gpu) ? 'middle' : 'low';
}
}
- Reduce texture size
Skybox textures, textures for various Avatar components, image and video assets. There's also a deeply hidden texture path for user-uploaded model textures. We can use https://gltf.report/ to analyze the specific GPU size occupied by textures in model files. I've written an analysis tool that can scan glb files in a directory and analyze and locate model issues.
const fs = require('fs');
const path = require('path');
const { Document, NodeIO } = require('@gltf-transform/core');
const { execSync } = require('child_process');
const {
KHRDracoMeshCompression,
KHRMaterialsEmissiveStrength,
KHRMaterialsSpecular,
KHRMaterialsIOR,
KHRMaterialsClearcoat,
KHRMaterialsIridescence,
KHRMeshQuantization,
EXTMeshoptCompression,
} = require('@gltf-transform/extensions');
const draco3d = require('draco3dgltf');
const meshopt = require('meshoptimizer');
(async () => {
let totalGPUMemory = 0;
const resList = [];
function inspectFile(filePath) {
const output = execSync(`gltf-transform inspect "${filePath}"`, { encoding: 'utf-8' });
console.log(output);
}
async function processFile(filePath) {
const io = new NodeIO()
.registerExtensions([
KHRDracoMeshCompression,
KHRMaterialsEmissiveStrength,
KHRMaterialsSpecular,
KHRMaterialsIOR,
KHRMaterialsClearcoat,
KHRMaterialsIridescence,
KHRMeshQuantization,
EXTMeshoptCompression,
])
.registerDependencies({
'draco3d.decoder': await draco3d.createDecoderModule(),
'meshopt.decoder': await meshopt.MeshoptDecoder,
});
let document;
try {
document = await io.read(filePath);
} catch (error) {
console.error(`Error reading ${filePath}:`, error);
return;
}
let fileGPUMemory = 0;
document
.getRoot()
.listTextures()
.forEach(texture => {
const image = texture.getImage();
const dimensions = texture.getSize();
if (image && dimensions) {
// Assuming 4 bytes per pixel (RGBA)
const memorySize = dimensions[0] * dimensions[1] * 4;
fileGPUMemory += memorySize;
}
});
const res = `${fileGPUMemory / 1024 / 1024} MB texture GPU memory: ${filePath}`;
resList.push(res);
}
async function traverseDirectory(directoryPath) {
const files = fs.readdirSync(directoryPath);
for (const file of files) {
const fullPath = path.join(directoryPath, file);
if (fs.statSync(fullPath).isDirectory()) {
await traverseDirectory(fullPath);
} else if (fullPath.endsWith('.glb') || fullPath.endsWith('.gltf')) {
await processFile(fullPath);
}
}
}
// await traverseDirectory(path.resolve(__dirname, 'models'));
await traverseDirectory(path.resolve(__dirname, '../../../avatar'))
resList.sort((a, b) => {
const aMemory = parseFloat(a.split(' ')[0]);
const bMemory = parseFloat(b.split(' ')[0]);
return bMemory - aMemory;
});
console.log(resList);
fs.writeFileSync('./gpu-memory.txt', resList.join('\n'), 'utf-8');
})();
3.Instantiation of Skinned Meshes
After completing the two optimizations mentioned above, large scenes can now run smoothly on both PC and mobile devices when there are relatively few Avatars. The current bottleneck is now with the Avatars themselves. Our Avatars have more than ten components: hairstyles, facial features, clothing, etc. Since the characters support movement and can perform many skeletal animations, each Avatar's skinned mesh represents a significant performance cost. With 20 people, there are over 200 skinned meshes in the scene, which already creates a performance bottleneck in rendering.
Regarding the discussion of instancedSkinnedMesh in the three.js issues: https://github.com/mrdoob/three.js/pull/22667, here's the extracted core code
import * as THREE from 'three'
const _instanceLocalMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _instanceWorldMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _offsetMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _identityMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _instanceIntersects = []
let patchedChunks = false
export class InstancedSkinnedMesh extends THREE.SkinnedMesh {
constructor(geometry, material, count = 1) {
super(geometry, material)
this.instanceMatrix = new THREE.InstancedBufferAttribute(
new Float32Array(count * 16),
16
)
this.instanceColor = null
this.instanceBones = null
this.count = count
this.frustumCulled = false
this._mesh = null
this.isInstancedMesh = true
const bind = this.bind.bind(this)
this.bind = function (skeleton, bindMatrix) {
bind(skeleton, bindMatrix)
this.skeleton.update = (instanceBones, id) => {
const bones = this.skeleton.bones
const boneInverses = this.skeleton.boneInverses
const boneMatrices = instanceBones || this.skeleton.boneMatrices
const boneTexture = this.skeleton.boneTexture
const instanceId = id || 0
// flatten bone matrices to array
for (let i = 0, il = bones.length; i < il; i++) {
// compute the offset between the current and the original transform
const matrix = bones[i] ? bones[i].matrixWorld : _identityMatrix
_offsetMatrix.multiplyMatrices(matrix, boneInverses[i])
_offsetMatrix.toArray(
boneMatrices,
16 * (i + instanceId * bones.length)
)
}
if (boneTexture !== null) {
boneTexture.needsUpdate = true
}
}
this.skeleton.computeBoneTexture = this.skeleton.computeInstancedBoneTexture = () => {
this.skeleton.boneTexture = new THREE.DataTexture(
this.instanceBones,
this.skeleton.bones.length * 4,
this.count,
THREE.RGBAFormat,
THREE.FloatType
)
this.skeleton.boneTexture.needsUpdate = true
}
}
// Patch three.js skinning shader chunks for points and instanced bones
if (!patchedChunks) {
patchedChunks = true
THREE.ShaderChunk.points_vert = THREE.ShaderChunk.points_vert.replace(
'#include <clipping_planes_pars_vertex>',
'#include <clipping_planes_pars_vertex>\n#include <skinning_pars_vertex>'
)
THREE.ShaderChunk.points_vert = THREE.ShaderChunk.points_vert.replace(
'#include <morphtarget_vertex>',
'#include <skinbase_vertex>\n#include <morphtarget_vertex>\n#include <skinning_vertex>'
)
// Update PointsMaterial
THREE.ShaderLib.points.vertexShader = THREE.ShaderChunk.points_vert
THREE.ShaderChunk.skinning_pars_vertex = /* glsl */ `
#ifdef USE_SKINNING
uniform mat4 bindMatrix;
uniform mat4 bindMatrixInverse;
uniform highp sampler2D boneTexture;
uniform int boneTextureSize;
mat4 getBoneMatrix( const in float i ) {
#ifdef USE_INSTANCING
int j = 4 * int(i);
vec4 v1 = texelFetch(boneTexture, ivec2( j, gl_InstanceID ), 0);
vec4 v2 = texelFetch(boneTexture, ivec2( j + 1, gl_InstanceID ), 0);
vec4 v3 = texelFetch(boneTexture, ivec2( j + 2, gl_InstanceID ), 0);
vec4 v4 = texelFetch(boneTexture, ivec2( j + 3, gl_InstanceID ), 0);
#else
float j = i * 4.0;
float x = mod( j, float( boneTextureSize ) );
float y = floor( j / float( boneTextureSize ) );
float dx = 1.0 / float( boneTextureSize );
float dy = 1.0 / float( boneTextureSize );
y = dy * ( y + 0.5 );
vec4 v1 = texture2D( boneTexture, vec2( dx * ( x + 0.5 ), y ) );
vec4 v2 = texture2D( boneTexture, vec2( dx * ( x + 1.5 ), y ) );
vec4 v3 = texture2D( boneTexture, vec2( dx * ( x + 2.5 ), y ) );
vec4 v4 = texture2D( boneTexture, vec2( dx * ( x + 3.5 ), y ) );
#endif
mat4 bone = mat4( v1, v2, v3, v4 );
return bone;
}
#endif
`
}
}
copy(source) {
super.copy(source)
if (source.isInstancedMesh) {
this.instanceMatrix.copy(source.instanceMatrix)
if (source.instanceColor !== null)
this.instanceColor = source.instanceColor.clone()
this.count = source.count
}
return this
}
getColorAt(index, color) {
color.fromArray(this.instanceColor.array, index * 3)
}
getMatrixAt(index, matrix) {
matrix.fromArray(this.instanceMatrix.array, index * 16)
}
raycast(raycaster, intersects) {
const matrixWorld = this.matrixWorld
const raycastTimes = this.count
if (this._mesh === null) {
this._mesh = new THREE.SkinnedMesh(this.geometry, this.material)
this._mesh.copy(this)
}
const _mesh = this._mesh
if (_mesh.material === undefined) return
for (let instanceId = 0; instanceId < raycastTimes; instanceId++) {
// calculate the world matrix for each instance
this.getMatrixAt(instanceId, _instanceLocalMatrix)
_instanceWorldMatrix.multiplyMatrices(matrixWorld, _instanceLocalMatrix)
// the mesh represents this single instance
_mesh.matrixWorld = _instanceWorldMatrix
_mesh.raycast(raycaster, _instanceIntersects)
// process the result of raycast
for (let i = 0, l = _instanceIntersects.length; i < l; i++) {
const intersect = _instanceIntersects[i]
intersect.instanceId = instanceId
intersect.object = this
intersects.push(intersect)
}
_instanceIntersects.length = 0
}
}
setColorAt(index, color) {
if (this.instanceColor === null) {
this.instanceColor = new THREE.InstancedBufferAttribute(
new Float32Array(this.instanceMatrix.count * 3),
3
)
}
color.toArray(this.instanceColor.array, index * 3)
}
setMatrixAt(index, matrix) {
matrix.toArray(this.instanceMatrix.array, index * 16)
}
setBonesAt(index, skeleton) {
skeleton = skeleton || this.skeleton
const size = skeleton.bones.length * 16
if (this.instanceBones === null) {
this.instanceBones = new Float32Array(size * this.count)
}
skeleton.update(this.instanceBones, index)
}
updateMorphTargets() {}
dispose() {
this.dispatchEvent({ type: 'dispose' })
}
}
4.Business code optimization
After completing the above three major performance optimizations, what remains are the code snippets written by business colleagues that unintentionally affect the rendering process. This part is relatively easy to identify using Chrome's DevTools, so I won't elaborate further.
Optimization Results
After the series of optimizations mentioned above, we achieved having over a hundred people in a unified space while maintaining 60 frames per second performance on https://timmerse.com.
You can enter our Demo page for further experience:
Posted on July 9, 2024
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.