Commits

davep committed 9fe99b9

reapply 668dcacd6e76: MAINT-646 Vectorize LLPolyMesh

  • Participants
  • Parent commits 3993ce4

Comments (0)

Files changed (10)

File indra/llcharacter/llcharacter.cpp

 //-----------------------------------------------------------------------------
 // updateMotions()
 //-----------------------------------------------------------------------------
+static LLFastTimer::DeclareTimer FTM_UPDATE_ANIMATION("Update Animation");
+static LLFastTimer::DeclareTimer FTM_UPDATE_HIDDEN_ANIMATION("Update Hidden Anim");
+static LLFastTimer::DeclareTimer FTM_UPDATE_MOTIONS("Update Motions");
+
 void LLCharacter::updateMotions(e_update_t update_type)
 {
 	if (update_type == HIDDEN_UPDATE)
 	{
+		LLFastTimer t(FTM_UPDATE_HIDDEN_ANIMATION);
 		mMotionController.updateMotionsMinimal();
 	}
 	else
 	{
+		LLFastTimer t(FTM_UPDATE_ANIMATION);
 		// unpause if the number of outstanding pause requests has dropped to the initial one
 		if (mMotionController.isPaused() && mPauseRequest->getNumRefs() == 1)
 		{
 		}
 		bool force_update = (update_type == FORCE_UPDATE);
 		{
+			LLFastTimer t(FTM_UPDATE_MOTIONS);
 			mMotionController.updateMotions(force_update);
 		}
 	}

File indra/llcharacter/llmotioncontroller.cpp

 //-----------------------------------------------------------------------------
 // updateMotionsByType()
 //-----------------------------------------------------------------------------
+static LLFastTimer::DeclareTimer FTM_MOTION_ON_UPDATE("Motion onUpdate");
+
 void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_type)
 {
 	BOOL update_result = TRUE;
 			}
 
 			// perform motion update
-			update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature);
+			{
+				LLFastTimer t(FTM_MOTION_ON_UPDATE);
+				update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature);
+			}
 		}
 
 		//**********************
 	{
 		// update additive motions
 		updateAdditiveMotions();
+				
 		resetJointSignatures();
-
+		
 		// update all regular motions
 		updateRegularMotions();
-
+		
 		if (use_quantum)
 		{
 			mPoseBlender.blendAndCache(TRUE);

File indra/newview/lldriverparam.cpp

 	mAvatarp(avatarp), 
 	mWearablep(NULL)
 {
+	mDefaultVec.clear();
 }
 
 LLDriverParam::LLDriverParam(LLWearable *wearablep) : 
 	mAvatarp(NULL), 
 	mWearablep(wearablep)
 {
+	mDefaultVec.clear();
 }
 
 LLDriverParam::~LLDriverParam()
 	return sum; 
 }
 
-const LLVector3	&LLDriverParam::getAvgDistortion()	
+const LLVector4a	&LLDriverParam::getAvgDistortion()	
 {
 	// It's not actually correct to take the average of averages, but it good enough here.
-	LLVector3 sum;
+	LLVector4a sum;
+	sum.clear();
 	S32 count = 0;
 	for( entry_list_t::iterator iter = mDriven.begin(); iter != mDriven.end(); iter++ )
 	{
 		LLDrivenEntry* driven = &(*iter);
-		sum += driven->mParam->getAvgDistortion();
+		sum.add(driven->mParam->getAvgDistortion());
 		count++;
 	}
-	sum /= (F32)count;
+	sum.mul( 1.f/(F32)count);
 
 	mDefaultVec = sum;
 	return mDefaultVec; 
 }
 
 
-LLVector3	LLDriverParam::getVertexDistortion(S32 index, LLPolyMesh *poly_mesh)
+LLVector4a	LLDriverParam::getVertexDistortion(S32 index, LLPolyMesh *poly_mesh)
 {
-	LLVector3 sum;
+	LLVector4a sum;
+	sum.clear();
 	for( entry_list_t::iterator iter = mDriven.begin(); iter != mDriven.end(); iter++ )
 	{
 		LLDrivenEntry* driven = &(*iter);
-		sum += driven->mParam->getVertexDistortion( index, poly_mesh );
+		sum.add(driven->mParam->getVertexDistortion( index, poly_mesh ));
 	}
 	return sum;
 }
 
-const LLVector3*	LLDriverParam::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)
+const LLVector4a*	LLDriverParam::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)
 {
 	mCurrentDistortionParam = NULL;
-	const LLVector3* v = NULL;
+	const LLVector4a* v = NULL;
 	for( entry_list_t::iterator iter = mDriven.begin(); iter != mDriven.end(); iter++ )
 	{
 		LLDrivenEntry* driven = &(*iter);
 	return v;
 };
 
-const LLVector3*	LLDriverParam::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)
+const LLVector4a*	LLDriverParam::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)
 {
 	llassert( mCurrentDistortionParam );
 	if( !mCurrentDistortionParam )
 	}
 
 	// We're already in the middle of a param's distortions, so get the next one.
-	const LLVector3* v = driven->mParam->getNextDistortion( index, poly_mesh );
+	const LLVector4a* v = driven->mParam->getNextDistortion( index, poly_mesh );
 	if( (!v) && (iter != mDriven.end()) )
 	{
 		// This param is finished, so start the next param.  It might not have any

File indra/newview/lldriverparam.h

 	
 	// LLViewerVisualParam Virtual functions
 	/*virtual*/ F32					getTotalDistortion();
-	/*virtual*/ const LLVector3&	getAvgDistortion();
+	/*virtual*/ const LLVector4a&	getAvgDistortion();
 	/*virtual*/ F32					getMaxDistortion();
-	/*virtual*/ LLVector3			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh);
-	/*virtual*/ const LLVector3*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh);
-	/*virtual*/ const LLVector3*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh);
+	/*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh);
+	/*virtual*/ const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh);
+	/*virtual*/ const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh);
 
 protected:
 	F32 getDrivenWeight(const LLDrivenEntry* driven, F32 input_weight);
 	void setDrivenWeight(LLDrivenEntry *driven, F32 driven_weight, bool upload_bake);
 
 
-	LLVector3	mDefaultVec; // temp holder
+	LLVector4a	mDefaultVec; // temp holder
 	typedef std::vector<LLDrivenEntry> entry_list_t;
 	entry_list_t mDriven;
 	LLViewerVisualParam* mCurrentDistortionParam;

File indra/newview/llpolymesh.cpp

 BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )
 {
         U32 i;
-        mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
-        mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
-        mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+        mBaseCoords = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a));
+        mBaseNormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a));
+        mBaseBinormals = (LLVector4a*) ll_aligned_malloc_16(numVertices*sizeof(LLVector4a));
         mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
         mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
         mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32));
         for (i = 0; i < numVertices; i++)
         {
-                mWeights[i] = 0.f;
+			mBaseCoords[i].clear();
+			mBaseNormals[i].clear();
+			mBaseBinormals[i].clear();
+			mWeights[i] = 0.f;
         }
         mNumVertices = numVertices;
         return TRUE;
 
                         allocateVertexData( numVertices );      
 
-                        //----------------------------------------------------------------
-                        // Coords
-                        //----------------------------------------------------------------
-                        numRead = fread(mBaseCoords, 3*sizeof(float), numVertices, fp);
-                        llendianswizzle(mBaseCoords, sizeof(float), 3*numVertices);
-                        if (numRead != numVertices)
-                        {
-                                llerrs << "can't read Coordinates from " << fileName << llendl;
-                                return FALSE;
-                        }
+						for (U16 i = 0; i < numVertices; ++i)
+						{
+							//----------------------------------------------------------------
+							// Coords
+							//----------------------------------------------------------------
+							numRead = fread(&mBaseCoords[i], sizeof(float), 3, fp);
+							llendianswizzle(&mBaseCoords[i], sizeof(float), 3);
+							if (numRead != 3)
+							{
+									llerrs << "can't read Coordinates from " << fileName << llendl;
+									return FALSE;
+							}
+						}
 
-                        //----------------------------------------------------------------
-                        // Normals
-                        //----------------------------------------------------------------
-                        numRead = fread(mBaseNormals, 3*sizeof(float), numVertices, fp);
-                        llendianswizzle(mBaseNormals, sizeof(float), 3*numVertices);
-                        if (numRead != numVertices)
-                        {
-                                llerrs << " can't read Normals from " << fileName << llendl;
-                                return FALSE;
-                        }
+						for (U16 i = 0; i < numVertices; ++i)
+						{
+							//----------------------------------------------------------------
+							// Normals
+							//----------------------------------------------------------------
+							numRead = fread(&mBaseNormals[i], sizeof(float), 3, fp);
+							llendianswizzle(&mBaseNormals[i], sizeof(float), 3);
+							if (numRead != 3)
+							{
+									llerrs << " can't read Normals from " << fileName << llendl;
+									return FALSE;
+							}
+						}
 
-                        //----------------------------------------------------------------
-                        // Binormals
-                        //----------------------------------------------------------------
-                        numRead = fread(mBaseBinormals, 3*sizeof(float), numVertices, fp);
-                        llendianswizzle(mBaseBinormals, sizeof(float), 3*numVertices);
-                        if (numRead != numVertices)
-                        {
-                                llerrs << " can't read Binormals from " << fileName << llendl;
-                                return FALSE;
-                        }
-
+						for (U16 i = 0; i < numVertices; ++i)
+						{
+							//----------------------------------------------------------------
+							// Binormals
+							//----------------------------------------------------------------
+							numRead = fread(&mBaseBinormals[i], sizeof(float), 3, fp);
+							llendianswizzle(&mBaseBinormals[i], sizeof(float), 3);
+							if (numRead != 3)
+							{
+									llerrs << " can't read Binormals from " << fileName << llendl;
+									return FALSE;
+							}
+						}
 
                         //----------------------------------------------------------------
                         // TexCoords
 	{
 		// Allocate memory without initializing every vector
 		// NOTE: This makes asusmptions about the size of LLVector[234]
-		int nverts = mSharedData->mNumVertices;
-		int nfloats = nverts * (2*4 + 3*3 + 2 + 4);
+		S32 nverts = mSharedData->mNumVertices;
+		//make sure it's an even number of verts for alignment
+		nverts += nverts%2;
+		S32 nfloats = nverts * (
+					4 + //coords
+					4 + //normals
+					4 + //weights
+					2 + //coords
+					4 + //scaled normals
+					4 + //binormals
+					4); //scaled binormals
+
 		//use 16 byte aligned vertex data to make LLPolyMesh SSE friendly
 		mVertexData = (F32*) ll_aligned_malloc_16(nfloats*4);
-		int offset = 0;
-		mCoords				= 	(LLVector4*)(mVertexData + offset); offset += 4*nverts;
-		mNormals			=	(LLVector4*)(mVertexData + offset); offset += 4*nverts;
-		mClothingWeights	= 	(LLVector4*)(mVertexData + offset); offset += 4*nverts;
-		mTexCoords			= 	(LLVector2*)(mVertexData + offset); offset += 2*nverts;
-
-		// these members don't need to be 16-byte aligned, but the first one might be
-		// read during an aligned memcpy of mTexCoords
-		mScaledNormals =                (LLVector3*)(mVertexData + offset); offset += 3*nverts;
-		mBinormals =                    (LLVector3*)(mVertexData + offset); offset += 3*nverts;
-		mScaledBinormals =              (LLVector3*)(mVertexData + offset); offset += 3*nverts; 
+		S32 offset = 0;
+		mCoords				= 	(LLVector4a*)(mVertexData + offset); offset += 4*nverts;
+		mNormals			=	(LLVector4a*)(mVertexData + offset); offset += 4*nverts;
+		mClothingWeights	= 	(LLVector4a*)(mVertexData + offset); offset += 4*nverts;
+		mTexCoords			= 	(LLVector2*)(mVertexData + offset);  offset += 2*nverts;
+		mScaledNormals		=   (LLVector4a*)(mVertexData + offset); offset += 4*nverts;
+		mBinormals			=   (LLVector4a*)(mVertexData + offset); offset += 4*nverts;
+		mScaledBinormals	=   (LLVector4a*)(mVertexData + offset); offset += 4*nverts; 
 		initializeForMorph();
 	}
 }
 //-----------------------------------------------------------------------------
 // getWritableCoords()
 //-----------------------------------------------------------------------------
-LLVector4 *LLPolyMesh::getWritableCoords()
+LLVector4a *LLPolyMesh::getWritableCoords()
 {
         return mCoords;
 }
 //-----------------------------------------------------------------------------
 // getWritableNormals()
 //-----------------------------------------------------------------------------
-LLVector4 *LLPolyMesh::getWritableNormals()
+LLVector4a *LLPolyMesh::getWritableNormals()
 {
         return mNormals;
 }
 //-----------------------------------------------------------------------------
 // getWritableBinormals()
 //-----------------------------------------------------------------------------
-LLVector3 *LLPolyMesh::getWritableBinormals()
+LLVector4a *LLPolyMesh::getWritableBinormals()
 {
         return mBinormals;
 }
 //-----------------------------------------------------------------------------
 // getWritableClothingWeights()
 //-----------------------------------------------------------------------------
-LLVector4       *LLPolyMesh::getWritableClothingWeights()
+LLVector4a       *LLPolyMesh::getWritableClothingWeights()
 {
         return mClothingWeights;
 }
 //-----------------------------------------------------------------------------
 // getScaledNormals()
 //-----------------------------------------------------------------------------
-LLVector3 *LLPolyMesh::getScaledNormals()
+LLVector4a *LLPolyMesh::getScaledNormals()
 {
         return mScaledNormals;
 }
 //-----------------------------------------------------------------------------
 // getScaledBinormals()
 //-----------------------------------------------------------------------------
-LLVector3 *LLPolyMesh::getScaledBinormals()
+LLVector4a *LLPolyMesh::getScaledBinormals()
 {
         return mScaledBinormals;
 }
 //-----------------------------------------------------------------------------
 void LLPolyMesh::initializeForMorph()
 {
-    for (U32 i = 0; i < mSharedData->mNumVertices; ++i)
+    LLVector4a::memcpyNonAliased16((F32*) mCoords, (F32*) mSharedData->mBaseCoords, sizeof(LLVector4a) * mSharedData->mNumVertices);
+	LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices);
+	LLVector4a::memcpyNonAliased16((F32*) mScaledNormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices);
+	LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices);
+	LLVector4a::memcpyNonAliased16((F32*) mScaledBinormals, (F32*) mSharedData->mBaseNormals, sizeof(LLVector4a) * mSharedData->mNumVertices);
+	LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) mSharedData->mTexCoords, sizeof(LLVector2) * (mSharedData->mNumVertices + mSharedData->mNumVertices%2));
+
+	for (U32 i = 0; i < mSharedData->mNumVertices; ++i)
 	{
-		mCoords[i] = LLVector4(mSharedData->mBaseCoords[i]);
-		mNormals[i] = LLVector4(mSharedData->mBaseNormals[i]);
+		mClothingWeights[i].clear();
 	}
-
-	memcpy(mScaledNormals, mSharedData->mBaseNormals, sizeof(LLVector3) * mSharedData->mNumVertices);	/*Flawfinder: ignore*/
-	memcpy(mBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices);	/*Flawfinder: ignore*/
-	memcpy(mScaledBinormals, mSharedData->mBaseBinormals, sizeof(LLVector3) * mSharedData->mNumVertices);		/*Flawfinder: ignore*/
-	memcpy(mTexCoords, mSharedData->mTexCoords, sizeof(LLVector2) * mSharedData->mNumVertices);		/*Flawfinder: ignore*/
-	memset(mClothingWeights, 0, sizeof(LLVector4) * mSharedData->mNumVertices);
 }
 
 //-----------------------------------------------------------------------------
 LLPolySkeletalDistortion::LLPolySkeletalDistortion(LLVOAvatar *avatarp)
 {
         mAvatar = avatarp;
-        mDefaultVec.setVec(0.001f, 0.001f, 0.001f);
+        mDefaultVec.splat(0.001f);
 }
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 // apply()
 //-----------------------------------------------------------------------------
+static LLFastTimer::DeclareTimer FTM_POLYSKELETAL_DISTORTION_APPLY("Skeletal Distortion");
+
 void LLPolySkeletalDistortion::apply( ESex avatar_sex )
 {
+	LLFastTimer t(FTM_POLYSKELETAL_DISTORTION_APPLY);
+
         F32 effective_weight = ( getSex() & avatar_sex ) ? mCurWeight : getDefaultWeight();
 
         LLJoint* joint;
 {
         LLPolyMorphData* cloned_morph_data = new LLPolyMorphData(*src_data);
         cloned_morph_data->mName = name;
+		LLVector4a dir;
+		dir.load3(direction.mV);
+
         for (U32 v=0; v < cloned_morph_data->mNumIndices; v++)
         {
-                cloned_morph_data->mCoords[v] = direction;
-                cloned_morph_data->mNormals[v] = LLVector3(0,0,0);
-                cloned_morph_data->mBinormals[v] = LLVector3(0,0,0);
+                cloned_morph_data->mCoords[v] = dir;
+                cloned_morph_data->mNormals[v].clear();
+                cloned_morph_data->mBinormals[v].clear();
         }
         return cloned_morph_data;
 }
 {
         LLPolyMorphData* cloned_morph_data = new LLPolyMorphData(*src_data);
         cloned_morph_data->mName = name;
+
+		LLVector4a sc;
+		sc.splat(scale);
+
+		LLVector4a nsc;
+		nsc.set(scale, -scale, scale, scale);
+
         for (U32 v=0; v < cloned_morph_data->mNumIndices; v++)
         {
-                cloned_morph_data->mCoords[v] = src_data->mCoords[v]*scale;
-                cloned_morph_data->mNormals[v] = src_data->mNormals[v]*scale;
-                cloned_morph_data->mBinormals[v] = src_data->mBinormals[v]*scale;
-                if (cloned_morph_data->mCoords[v][1] < 0)
-                {
-                        cloned_morph_data->mCoords[v][1] *= -1;
-                        cloned_morph_data->mNormals[v][1] *= -1;
-                        cloned_morph_data->mBinormals[v][1] *= -1;
-                }
+            if (cloned_morph_data->mCoords[v][1] < 0)
+            {
+                cloned_morph_data->mCoords[v].setMul(src_data->mCoords[v],nsc);
+				cloned_morph_data->mNormals[v].setMul(src_data->mNormals[v],nsc);
+				cloned_morph_data->mBinormals[v].setMul(src_data->mBinormals[v],nsc);
+			}
+			else
+			{
+				cloned_morph_data->mCoords[v].setMul(src_data->mCoords[v],sc);
+				cloned_morph_data->mNormals[v].setMul(src_data->mNormals[v], sc);
+				cloned_morph_data->mBinormals[v].setMul(src_data->mBinormals[v],sc);
+			}
         }
         return cloned_morph_data;
 }

File indra/newview/llpolymesh.h

 							
 	// vertex data			
 	S32						mNumVertices;
-	LLVector3				*mBaseCoords;
-	LLVector3				*mBaseNormals;
-	LLVector3				*mBaseBinormals;
+	LLVector4a				*mBaseCoords;
+	LLVector4a				*mBaseNormals;
+	LLVector4a				*mBaseBinormals;
 	LLVector2				*mTexCoords;
 	LLVector2				*mDetailTexCoords;
 	F32						*mWeights;
 	}
 
 	// Get coords
-	const LLVector4	*getCoords() const{
+	const LLVector4a	*getCoords() const{
 		return mCoords;
 	}
 
 	// non const version
-	LLVector4 *getWritableCoords();
+	LLVector4a *getWritableCoords();
 
 	// Get normals
-	const LLVector4	*getNormals() const{ 
+	const LLVector4a	*getNormals() const{ 
 		return mNormals; 
 	}
 
 	// Get normals
-	const LLVector3	*getBinormals() const{ 
+	const LLVector4a	*getBinormals() const{ 
 		return mBinormals; 
 	}
 
 	// Get base mesh normals
-	const LLVector3 *getBaseNormals() const{
+	const LLVector4a *getBaseNormals() const{
 		llassert(mSharedData);
 		return mSharedData->mBaseNormals;
 	}
 
 	// Get base mesh normals
-	const LLVector3 *getBaseBinormals() const{
+	const LLVector4a *getBaseBinormals() const{
 		llassert(mSharedData);
 		return mSharedData->mBaseBinormals;
 	}
 
 	// intermediate morphed normals and output normals
-	LLVector4 *getWritableNormals();
-	LLVector3 *getScaledNormals();
+	LLVector4a *getWritableNormals();
+	LLVector4a *getScaledNormals();
 
-	LLVector3 *getWritableBinormals();
-	LLVector3 *getScaledBinormals();
+	LLVector4a *getWritableBinormals();
+	LLVector4a *getScaledBinormals();
 
 	// Get texCoords
 	const LLVector2	*getTexCoords() const { 
 
 	F32			*getWritableWeights() const;
 
-	LLVector4	*getWritableClothingWeights();
+	LLVector4a	*getWritableClothingWeights();
 
-	const LLVector4		*getClothingWeights()
+	const LLVector4a		*getClothingWeights()
 	{
 		return mClothingWeights;	
 	}
 	// Single array of floats for allocation / deletion
 	F32						*mVertexData;
 	// deformed vertices (resulting from application of morph targets)
-	LLVector4				*mCoords;
+	LLVector4a				*mCoords;
 	// deformed normals (resulting from application of morph targets)
-	LLVector3				*mScaledNormals;
+	LLVector4a				*mScaledNormals;
 	// output normals (after normalization)
-	LLVector4				*mNormals;
+	LLVector4a				*mNormals;
 	// deformed binormals (resulting from application of morph targets)
-	LLVector3				*mScaledBinormals;
+	LLVector4a				*mScaledBinormals;
 	// output binormals (after normalization)
-	LLVector3				*mBinormals;
+	LLVector4a				*mBinormals;
 	// weight values that mark verts as clothing/skin
-	LLVector4				*mClothingWeights;
+	LLVector4a				*mClothingWeights;
 	// output texture coordinates
 	LLVector2				*mTexCoords;
 	
 	
 	// LLViewerVisualParam Virtual functions
 	/*virtual*/ F32					getTotalDistortion() { return 0.1f; }
-	/*virtual*/ const LLVector3&	getAvgDistortion()	{ return mDefaultVec; }
+	/*virtual*/ const LLVector4a&	getAvgDistortion()	{ return mDefaultVec; }
 	/*virtual*/ F32					getMaxDistortion() { return 0.1f; }
-	/*virtual*/ LLVector3			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh){return LLVector3(0.001f, 0.001f, 0.001f);}
-	/*virtual*/ const LLVector3*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return &mDefaultVec;};
-	/*virtual*/ const LLVector3*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return NULL;};
+	/*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh){return LLVector4a(0.001f, 0.001f, 0.001f);}
+	/*virtual*/ const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return &mDefaultVec;};
+	/*virtual*/ const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh){index = 0; poly_mesh = NULL; return NULL;};
 
 protected:
 	typedef std::map<LLJoint*, LLVector3> joint_vec_map_t;
 	joint_vec_map_t mJointScales;
 	joint_vec_map_t mJointOffsets;
-	LLVector3	mDefaultVec;
+	LLVector4a	mDefaultVec;
 	// Backlink only; don't make this an LLPointer.
 	LLVOAvatar *mAvatar;
 };

File indra/newview/llpolymorph.cpp

 	mNumIndices = 0;
 	mCurrentIndex = 0;
 	mTotalDistortion = 0.f;
-	mAvgDistortion.zeroVec();
+	mAvgDistortion.clear();
 	mMaxDistortion = 0.f;
 	mVertexIndices = NULL;
 	mCoords = NULL;
 {
 	const S32 numVertices = mNumIndices;
 
-	mCoords = new LLVector3[numVertices];
-	mNormals = new LLVector3[numVertices];
-	mBinormals = new LLVector3[numVertices];
+	mCoords = new LLVector4a[numVertices];
+	mNormals = new LLVector4a[numVertices];
+	mBinormals = new LLVector4a[numVertices];
 	mTexCoords = new LLVector2[numVertices];
 	mVertexIndices = new U32[numVertices];
 	
 	//-------------------------------------------------------------------------
 	// allocate vertices
 	//-------------------------------------------------------------------------
-	mCoords = new LLVector3[numVertices];
-	mNormals = new LLVector3[numVertices];
-	mBinormals = new LLVector3[numVertices];
+	mCoords = new LLVector4a[numVertices];
+	mNormals = new LLVector4a[numVertices];
+	mBinormals = new LLVector4a[numVertices];
 	mTexCoords = new LLVector2[numVertices];
 	// Actually, we are allocating more space than we need for the skiplist
 	mVertexIndices = new U32[numVertices];
 	mNumIndices = 0;
 	mTotalDistortion = 0.f;
 	mMaxDistortion = 0.f;
-	mAvgDistortion.zeroVec();
+	mAvgDistortion.clear();
 	mMesh = mesh;
 
 	//-------------------------------------------------------------------------
 		}
 
 
-		numRead = fread(&mCoords[v].mV, sizeof(F32), 3, fp);
-		llendianswizzle(&mCoords[v].mV, sizeof(F32), 3);
+		numRead = fread(&mCoords[v], sizeof(F32), 3, fp);
+		llendianswizzle(&mCoords[v], sizeof(F32), 3);
 		if (numRead != 3)
 		{
 			llwarns << "Can't read morph target vertex coordinates" << llendl;
 			return FALSE;
 		}
 
-		F32 magnitude = mCoords[v].magVec();
+		F32 magnitude = mCoords[v].getLength3().getF32();
 		
 		mTotalDistortion += magnitude;
-		mAvgDistortion.mV[VX] += fabs(mCoords[v].mV[VX]);
-		mAvgDistortion.mV[VY] += fabs(mCoords[v].mV[VY]);
-		mAvgDistortion.mV[VZ] += fabs(mCoords[v].mV[VZ]);
+		LLVector4a t;
+		t.setAbs(mCoords[v]);
+		mAvgDistortion.add(t);
 		
 		if (magnitude > mMaxDistortion)
 		{
 			mMaxDistortion = magnitude;
 		}
 
-		numRead = fread(&mNormals[v].mV, sizeof(F32), 3, fp);
-		llendianswizzle(&mNormals[v].mV, sizeof(F32), 3);
+		numRead = fread(&mNormals[v], sizeof(F32), 3, fp);
+		llendianswizzle(&mNormals[v], sizeof(F32), 3);
 		if (numRead != 3)
 		{
 			llwarns << "Can't read morph target normal" << llendl;
 			return FALSE;
 		}
 
-		numRead = fread(&mBinormals[v].mV, sizeof(F32), 3, fp);
-		llendianswizzle(&mBinormals[v].mV, sizeof(F32), 3);
+		numRead = fread(&mBinormals[v], sizeof(F32), 3, fp);
+		llendianswizzle(&mBinormals[v], sizeof(F32), 3);
 		if (numRead != 3)
 		{
 			llwarns << "Can't read morph target binormal" << llendl;
 		mNumIndices++;
 	}
 
-	mAvgDistortion = mAvgDistortion * (1.f/(F32)mNumIndices);
-	mAvgDistortion.normVec();
+	mAvgDistortion.mul(1.f/(F32)mNumIndices);
+	mAvgDistortion.normalize3fast();
 
 	return TRUE;
 }
 //-----------------------------------------------------------------------------
 // getVertexDistortion()
 //-----------------------------------------------------------------------------
-LLVector3 LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMesh *mesh)
+LLVector4a LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMesh *mesh)
 {
-	if (!mMorphData || mMesh != mesh) return LLVector3::zero;
+	if (!mMorphData || mMesh != mesh) return LLVector4a::getZero();
 
 	for(U32 index = 0; index < mMorphData->mNumIndices; index++)
 	{
 		}
 	}
 
-	return LLVector3::zero;
+	return LLVector4a::getZero();
 }
 
 //-----------------------------------------------------------------------------
 // getFirstDistortion()
 //-----------------------------------------------------------------------------
-const LLVector3 *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)
+const LLVector4a *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)
 {
-	if (!mMorphData) return &LLVector3::zero;
+	if (!mMorphData) return &LLVector4a::getZero();
 
-	LLVector3* resultVec;
+	LLVector4a* resultVec;
 	mMorphData->mCurrentIndex = 0;
 	if (mMorphData->mNumIndices)
 	{
 //-----------------------------------------------------------------------------
 // getNextDistortion()
 //-----------------------------------------------------------------------------
-const LLVector3 *LLPolyMorphTarget::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)
+const LLVector4a *LLPolyMorphTarget::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)
 {
-	if (!mMorphData) return &LLVector3::zero;
+	if (!mMorphData) return &LLVector4a::getZero();
 
-	LLVector3* resultVec;
+	LLVector4a* resultVec;
 	mMorphData->mCurrentIndex++;
 	if (mMorphData->mCurrentIndex < mMorphData->mNumIndices)
 	{
 //-----------------------------------------------------------------------------
 // getAvgDistortion()
 //-----------------------------------------------------------------------------
-const LLVector3& LLPolyMorphTarget::getAvgDistortion()	
+const LLVector4a& LLPolyMorphTarget::getAvgDistortion()	
 {
 	if (mMorphData) 
 	{
 	}
 	else 
 	{
-		return LLVector3::zero;
+		return LLVector4a::getZero();
 	}
 }
 
 //-----------------------------------------------------------------------------
 // apply()
 //-----------------------------------------------------------------------------
+static LLFastTimer::DeclareTimer FTM_APPLY_MORPH_TARGET("Apply Morph");
+
 void LLPolyMorphTarget::apply( ESex avatar_sex )
 {
 	if (!mMorphData || mNumMorphMasksPending > 0)
 		return;
 	}
 
+	LLFastTimer t(FTM_APPLY_MORPH_TARGET);
+
 	mLastSex = avatar_sex;
 
 	// Check for NaN condition (NaN is detected if a variable doesn't equal itself.
 	if (delta_weight != 0.f)
 	{
 		llassert(!mMesh->isLOD());
-		LLVector4 *coords = mMesh->getWritableCoords();
+		LLVector4a *coords = mMesh->getWritableCoords();
 
-		LLVector3 *scaled_normals = mMesh->getScaledNormals();
-		LLVector4 *normals = mMesh->getWritableNormals();
+		LLVector4a *scaled_normals = mMesh->getScaledNormals();
+		LLVector4a *normals = mMesh->getWritableNormals();
 
-		LLVector3 *scaled_binormals = mMesh->getScaledBinormals();
-		LLVector3 *binormals = mMesh->getWritableBinormals();
+		LLVector4a *scaled_binormals = mMesh->getScaledBinormals();
+		LLVector4a *binormals = mMesh->getWritableBinormals();
 
-		LLVector4 *clothing_weights = mMesh->getWritableClothingWeights();
+		LLVector4a *clothing_weights = mMesh->getWritableClothingWeights();
 		LLVector2 *tex_coords = mMesh->getWritableTexCoords();
 
 		F32 *maskWeightArray = (mVertMask) ? mVertMask->getMorphMaskWeights() : NULL;
 				maskWeight = maskWeightArray[vert_index_morph];
 			}
 
-			coords[vert_index_mesh] += LLVector4(mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight);
+
+			LLVector4a pos = mMorphData->mCoords[vert_index_morph];
+			pos.mul(delta_weight*maskWeight);
+			coords[vert_index_mesh].add(pos);
 
 			if (getInfo()->mIsClothingMorph && clothing_weights)
 			{
-				LLVector3 clothing_offset = mMorphData->mCoords[vert_index_morph] * delta_weight * maskWeight;
-				LLVector4* clothing_weight = &clothing_weights[vert_index_mesh];
-				clothing_weight->mV[VX] += clothing_offset.mV[VX];
-				clothing_weight->mV[VY] += clothing_offset.mV[VY];
-				clothing_weight->mV[VZ] += clothing_offset.mV[VZ];
-				clothing_weight->mV[VW] = maskWeight;
+				LLVector4a clothing_offset = mMorphData->mCoords[vert_index_morph];
+				clothing_offset.mul(delta_weight * maskWeight);
+				LLVector4a* clothing_weight = &clothing_weights[vert_index_mesh];
+				clothing_weight->add(clothing_offset);
+				clothing_weight->getF32ptr()[VW] = maskWeight;
 			}
 
 			// calculate new normals based on half angles
-			scaled_normals[vert_index_mesh] += mMorphData->mNormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR;
-			LLVector3 normalized_normal = scaled_normals[vert_index_mesh];
-			normalized_normal.normVec();
-			normals[vert_index_mesh] = LLVector4(normalized_normal);
+			LLVector4a norm = mMorphData->mNormals[vert_index_morph];
+			norm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR);
+			scaled_normals[vert_index_mesh].add(norm);
+			norm = scaled_normals[vert_index_mesh];
+			norm.normalize3fast();
+			normals[vert_index_mesh] = norm;
 
 			// calculate new binormals
-			scaled_binormals[vert_index_mesh] += mMorphData->mBinormals[vert_index_morph] * delta_weight * maskWeight * NORMAL_SOFTEN_FACTOR;
-			LLVector3 tangent = scaled_binormals[vert_index_mesh] % normalized_normal;
-			LLVector3 normalized_binormal = normalized_normal % tangent; 
-			normalized_binormal.normVec();
-			binormals[vert_index_mesh] = normalized_binormal;
-
+			LLVector4a binorm = mMorphData->mBinormals[vert_index_morph];
+			binorm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR);
+			scaled_binormals[vert_index_mesh].add(binorm);
+			LLVector4a tangent;
+			tangent.setCross3(scaled_binormals[vert_index_mesh], norm);
+			LLVector4a& normalized_binormal = binormals[vert_index_mesh];
+			normalized_binormal.setCross3(norm, tangent); 
+			normalized_binormal.normalize3fast();
+			
 			tex_coords[vert_index_mesh] += mMorphData->mTexCoords[vert_index_morph] * delta_weight * maskWeight;
 		}
 
 //-----------------------------------------------------------------------------
 void	LLPolyMorphTarget::applyMask(U8 *maskTextureData, S32 width, S32 height, S32 num_components, BOOL invert)
 {
-	LLVector4 *clothing_weights = getInfo()->mIsClothingMorph ? mMesh->getWritableClothingWeights() : NULL;
+	LLVector4a *clothing_weights = getInfo()->mIsClothingMorph ? mMesh->getWritableClothingWeights() : NULL;
 
 	if (!mVertMask)
 	{
 
 		if (maskWeights)
 		{
-			LLVector4 *coords = mMesh->getWritableCoords();
-			LLVector3 *scaled_normals = mMesh->getScaledNormals();
-			LLVector3 *scaled_binormals = mMesh->getScaledBinormals();
+			LLVector4a *coords = mMesh->getWritableCoords();
+			LLVector4a *scaled_normals = mMesh->getScaledNormals();
+			LLVector4a *scaled_binormals = mMesh->getScaledBinormals();
 			LLVector2 *tex_coords = mMesh->getWritableTexCoords();
 
+			LLVector4Logical clothing_mask;
+			clothing_mask.clear();
+			clothing_mask.setElement<0>();
+			clothing_mask.setElement<1>();
+			clothing_mask.setElement<2>();
+
+
 			for(U32 vert = 0; vert < mMorphData->mNumIndices; vert++)
 			{
 				F32 lastMaskWeight = mLastWeight * maskWeights[vert];
 				S32 out_vert = mMorphData->mVertexIndices[vert];
 
 				// remove effect of existing masked morph
-				coords[out_vert] -= LLVector4(mMorphData->mCoords[vert]) * lastMaskWeight;
-				scaled_normals[out_vert] -= mMorphData->mNormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR;
-				scaled_binormals[out_vert] -= mMorphData->mBinormals[vert] * lastMaskWeight * NORMAL_SOFTEN_FACTOR;
+				LLVector4a t;
+				t = mMorphData->mCoords[vert];
+				t.mul(lastMaskWeight);
+				coords[out_vert].sub(t);
+
+				t = mMorphData->mNormals[vert];
+				t.mul(lastMaskWeight*NORMAL_SOFTEN_FACTOR);
+				scaled_normals[out_vert].sub(t);
+
+				t = mMorphData->mBinormals[vert];
+				t.mul(lastMaskWeight*NORMAL_SOFTEN_FACTOR);
+				scaled_binormals[out_vert].sub(t);
+
 				tex_coords[out_vert] -= mMorphData->mTexCoords[vert] * lastMaskWeight;
 
 				if (clothing_weights)
 				{
-					LLVector3 clothing_offset = mMorphData->mCoords[vert] * lastMaskWeight;
-					LLVector4* clothing_weight = &clothing_weights[out_vert];
-					clothing_weight->mV[VX] -= clothing_offset.mV[VX];
-					clothing_weight->mV[VY] -= clothing_offset.mV[VY];
-					clothing_weight->mV[VZ] -= clothing_offset.mV[VZ];
+					LLVector4a clothing_offset = mMorphData->mCoords[vert];
+					clothing_offset.mul(lastMaskWeight);
+					LLVector4a* clothing_weight = &clothing_weights[out_vert];
+					LLVector4a t;
+					t.setSub(*clothing_weight, clothing_offset);
+					clothing_weight->setSelectWithMask(clothing_mask, clothing_offset, *clothing_weight);
 				}
 			}
 		}
 //-----------------------------------------------------------------------------
 // generateMask()
 //-----------------------------------------------------------------------------
-void LLPolyVertexMask::generateMask(U8 *maskTextureData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4 *clothing_weights)
+void LLPolyVertexMask::generateMask(U8 *maskTextureData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4a *clothing_weights)
 {
 // RN debug output that uses Image Debugger (http://www.cs.unc.edu/~baxter/projects/imdebug/)
 //	BOOL debugImg = FALSE; 
 
 		if (clothing_weights)
 		{
-			clothing_weights[vertIndex].mV[VW] = mWeights[index];
+			clothing_weights[vertIndex].getF32ptr()[VW] = mWeights[index];
 		}
 	}
 	mWeightsGenerated = TRUE;

File indra/newview/llpolymorph.h

 	U32					mNumIndices;
 	U32*				mVertexIndices;
 	U32					mCurrentIndex;
-	LLVector3*			mCoords;
-	LLVector3*			mNormals;
-	LLVector3*			mBinormals;
+	LLVector4a*			mCoords;
+	LLVector4a*			mNormals;
+	LLVector4a*			mBinormals;
 	LLVector2*			mTexCoords;
 
 	F32					mTotalDistortion;	// vertex distortion summed over entire morph
 	F32					mMaxDistortion;		// maximum single vertex distortion in a given morph
-	LLVector3			mAvgDistortion;		// average vertex distortion, to infer directionality of the morph
+	LLVector4a			mAvgDistortion;		// average vertex distortion, to infer directionality of the morph
 	LLPolyMeshSharedData*	mMesh;
 };
 
 	LLPolyVertexMask(LLPolyMorphData* morph_data);
 	~LLPolyVertexMask();
 
-	void generateMask(U8 *maskData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4 *clothing_weights);
+	void generateMask(U8 *maskData, S32 width, S32 height, S32 num_components, BOOL invert, LLVector4a *clothing_weights);
 	F32* getMorphMaskWeights();
 
 
 	
 	// LLViewerVisualParam Virtual functions
 	/*virtual*/ F32					getTotalDistortion();
-	/*virtual*/ const LLVector3&	getAvgDistortion();
+	/*virtual*/ const LLVector4a&	getAvgDistortion();
 	/*virtual*/ F32					getMaxDistortion();
-	/*virtual*/ LLVector3			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh);
-	/*virtual*/ const LLVector3*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh);
-	/*virtual*/ const LLVector3*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh);
+	/*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh);
+	/*virtual*/ const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh);
+	/*virtual*/ const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh);
 
 	void	applyMask(U8 *maskData, S32 width, S32 height, S32 num_components, BOOL invert);
 	void	addPendingMorphMask() { mNumMorphMasksPending++; }

File indra/newview/lltexlayerparams.h

 
 	// LLViewerVisualParam Virtual functions
 	/*virtual*/ F32					getTotalDistortion()									{ return 1.f; }
-	/*virtual*/ const LLVector3&	getAvgDistortion()										{ return mAvgDistortionVec; }
+	/*virtual*/ const LLVector4a&	getAvgDistortion()										{ return mAvgDistortionVec; }
 	/*virtual*/ F32					getMaxDistortion()										{ return 3.f; }
-	/*virtual*/ LLVector3			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh)	{ return LLVector3(1.f, 1.f, 1.f);}
-	/*virtual*/ const LLVector3*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return &mAvgDistortionVec;};
-	/*virtual*/ const LLVector3*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return NULL;};
+	/*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh)	{ return LLVector4a(1.f, 1.f, 1.f);}
+	/*virtual*/ const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return &mAvgDistortionVec;};
+	/*virtual*/ const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return NULL;};
 
 	// New functions
 	BOOL					render( S32 x, S32 y, S32 width, S32 height );
 	LLPointer<LLImageRaw>	mStaticImageRaw;
 	BOOL					mNeedsCreateTexture;
 	BOOL					mStaticImageInvalid;
-	LLVector3				mAvgDistortionVec;
+	LLVector4a				mAvgDistortionVec;
 	F32						mCachedEffectiveWeight;
 
 public:
 
 	// LLViewerVisualParam Virtual functions
 	/*virtual*/ F32					getTotalDistortion()									{ return 1.f; }
-	/*virtual*/ const LLVector3&	getAvgDistortion()										{ return mAvgDistortionVec; }
+	/*virtual*/ const LLVector4a&	getAvgDistortion()										{ return mAvgDistortionVec; }
 	/*virtual*/ F32					getMaxDistortion()										{ return 3.f; }
-	/*virtual*/ LLVector3			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh)	{ return LLVector3(1.f, 1.f, 1.f); }
-	/*virtual*/ const LLVector3*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return &mAvgDistortionVec;};
-	/*virtual*/ const LLVector3*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return NULL;};
+	/*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh)	{ return LLVector4a(1.f, 1.f, 1.f); }
+	/*virtual*/ const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return &mAvgDistortionVec;};
+	/*virtual*/ const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)	{ index = 0; poly_mesh = NULL; return NULL;};
 
 	// New functions
 	LLColor4				getNetColor() const;
 protected:
 	virtual void onGlobalColorChanged(bool upload_bake) {}
 private:
-	LLVector3				mAvgDistortionVec;
+	LLVector4a				mAvgDistortionVec;
 };
 
 class LLTexLayerParamColorInfo : public LLViewerVisualParamInfo

File indra/newview/llviewervisualparam.h

 
 	// New Virtual functions
 	virtual F32					getTotalDistortion() = 0;
-	virtual const LLVector3&	getAvgDistortion() = 0;
+	virtual const LLVector4a&	getAvgDistortion() = 0;
 	virtual F32					getMaxDistortion() = 0;
-	virtual LLVector3			getVertexDistortion(S32 index, LLPolyMesh *mesh) = 0;
-	virtual const LLVector3*	getFirstDistortion(U32 *index, LLPolyMesh **mesh) = 0;
-	virtual const LLVector3*	getNextDistortion(U32 *index, LLPolyMesh **mesh) = 0;
+	virtual LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *mesh) = 0;
+	virtual const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **mesh) = 0;
+	virtual const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **mesh) = 0;
 	
 	// interface methods
 	F32					getDisplayOrder() const		{ return getInfo()->mEditGroupDisplayOrder; }