davep avatar davep committed 6ecc05b

MAINT-616 Factor out calls to glGenFoo where possible, add setting to control synchronizing strategy WRT occlusion queries, add experimental transform feedback driven LoD update

Comments (0)

Files changed (27)

indra/llmath/llvolume.h

 #include "llstrider.h"
 #include "v4coloru.h"
 #include "llrefcount.h"
+#include "llpointer.h"
 #include "llfile.h"
 
 //============================================================================
 	LLVector2*  mTexCoords;
 	U16* mIndices;
 
+	//vertex buffer filled in by LLFace to cache this volume face geometry in vram 
+	// (declared as a LLPointer to LLRefCount to avoid dependency on LLVertexBuffer)
+	mutable LLPointer<LLRefCount> mVertexBuffer; 
+
 	std::vector<S32>	mEdge;
 
 	//list of skin weights for rigged volumes

indra/llrender/llcubemap.cpp

 		{
 			U32 texname = 0;
 			
-			LLImageGL::generateTextures(1, &texname);
+			LLImageGL::generateTextures(LLTexUnit::TT_CUBE_MAP, 1, &texname);
 
 			for (int i = 0; i < 6; i++)
 			{

indra/llrender/llgl.cpp

 PFNGLGETMULTISAMPLEFVPROC glGetMultisamplefv = NULL;
 PFNGLSAMPLEMASKIPROC glSampleMaski = NULL;
 
+//transform feedback (4.0 core)
+PFNGLBEGINTRANSFORMFEEDBACKPROC glBeginTransformFeedback = NULL;
+PFNGLENDTRANSFORMFEEDBACKPROC glEndTransformFeedback = NULL;
+PFNGLTRANSFORMFEEDBACKVARYINGSPROC glTransformFeedbackVaryings = NULL;
+PFNGLBINDBUFFERRANGEPROC glBindBufferRange = NULL;
+
 //GL_ARB_debug_output
 PFNGLDEBUGMESSAGECONTROLARBPROC glDebugMessageControlARB = NULL;
 PFNGLDEBUGMESSAGEINSERTARBPROC glDebugMessageInsertARB = NULL;
 	mHasDrawBuffers(FALSE),
 	mHasTextureRectangle(FALSE),
 	mHasTextureMultisample(FALSE),
+	mHasTransformFeedback(FALSE),
 	mMaxSampleMaskWords(0),
 	mMaxColorTextureSamples(0),
 	mMaxDepthTextureSamples(0),
 	mHasTextureRectangle = ExtensionExists("GL_ARB_texture_rectangle", gGLHExts.mSysExts);
 	mHasTextureMultisample = ExtensionExists("GL_ARB_texture_multisample", gGLHExts.mSysExts);
 	mHasDebugOutput = ExtensionExists("GL_ARB_debug_output", gGLHExts.mSysExts);
+	mHasTransformFeedback = mGLVersion >= 4.f ? TRUE : FALSE;
 #if !LL_DARWIN
 	mHasPointParameters = !mIsATI && ExtensionExists("GL_ARB_point_parameters", gGLHExts.mSysExts);
 #endif
 		glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC) GLH_EXT_GET_PROC_ADDRESS("glTexImage3DMultisample");
 		glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC) GLH_EXT_GET_PROC_ADDRESS("glGetMultisamplefv");
 		glSampleMaski = (PFNGLSAMPLEMASKIPROC) GLH_EXT_GET_PROC_ADDRESS("glSampleMaski");
-	}	
+	}
+	if (mHasTransformFeedback)
+	{
+		glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC) GLH_EXT_GET_PROC_ADDRESS("glBeginTransformFeedback");
+		glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC) GLH_EXT_GET_PROC_ADDRESS("glEndTransformFeedback");
+		glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC) GLH_EXT_GET_PROC_ADDRESS("glTransformFeedbackVaryings");
+		glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) GLH_EXT_GET_PROC_ADDRESS("glBindBufferRange");
+	}
 	if (mHasDebugOutput)
 	{
 		glDebugMessageControlARB = (PFNGLDEBUGMESSAGECONTROLARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDebugMessageControlARB");
 	gGL.matrixMode(LLRender::MM_MODELVIEW);
 }
 
+
+	
+LLGLSyncFence::LLGLSyncFence()
+{
+#ifdef GL_ARB_sync
+	mSync = 0;
+#endif
+}
+
+LLGLSyncFence::~LLGLSyncFence()
+{
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		glDeleteSync(mSync);
+	}
+#endif
+}
+
+void LLGLSyncFence::placeFence()
+{
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		glDeleteSync(mSync);
+	}
+	mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+#endif
+}
+
+bool LLGLSyncFence::isCompleted()
+{
+	bool ret = true;
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		GLenum status = glClientWaitSync(mSync, 0, 1);
+		if (status == GL_TIMEOUT_EXPIRED)
+		{
+			ret = false;
+		}
+	}
+#endif
+	return ret;
+}
+
+void LLGLSyncFence::wait()
+{
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		while (glClientWaitSync(mSync, 0, FENCE_WAIT_TIME_NANOSECONDS) == GL_TIMEOUT_EXPIRED)
+		{ //track the number of times we've waited here
+			static S32 waits = 0;
+			waits++;
+		}
+	}
+#endif
+}
+
+
+

indra/llrender/llgl.h

 	BOOL mHasDepthClamp;
 	BOOL mHasTextureRectangle;
 	BOOL mHasTextureMultisample;
+	BOOL mHasTransformFeedback;
 	S32 mMaxSampleMaskWords;
 	S32 mMaxColorTextureSamples;
 	S32 mMaxDepthTextureSamples;
 	virtual void updateGL() = 0;
 };
 
+const U32 FENCE_WAIT_TIME_NANOSECONDS = 10000;  //1 ms
+
+class LLGLFence
+{
+public:
+	virtual void placeFence() = 0;
+	virtual bool isCompleted() = 0;
+	virtual void wait() = 0;
+};
+
+class LLGLSyncFence : public LLGLFence
+{
+public:
+#ifdef GL_ARB_sync
+	GLsync mSync;
+#endif
+	
+	LLGLSyncFence();
+	virtual ~LLGLSyncFence();
+
+	void placeFence();
+	bool isCompleted();
+	void wait();
+};
+
 extern LLMatrix4 gGLObliqueProjectionInverse;
 
 #include "llglstates.h"

indra/llrender/llglheaders.h

 extern PFNGLGETMULTISAMPLEFVPROC glGetMultisamplefv;
 extern PFNGLSAMPLEMASKIPROC glSampleMaski;
 
+//transform feedback (4.0 core)
+extern PFNGLBEGINTRANSFORMFEEDBACKPROC glBeginTransformFeedback;
+extern PFNGLENDTRANSFORMFEEDBACKPROC glEndTransformFeedback;
+extern PFNGLTRANSFORMFEEDBACKVARYINGSPROC glTransformFeedbackVaryings;
+extern PFNGLBINDBUFFERRANGEPROC glBindBufferRange;
+
+
 #elif LL_WINDOWS
 //----------------------------------------------------------------------------
 // LL_WINDOWS
 extern PFNGLGETMULTISAMPLEFVPROC glGetMultisamplefv;
 extern PFNGLSAMPLEMASKIPROC glSampleMaski;
 
+//transform feedback (4.0 core)
+extern PFNGLBEGINTRANSFORMFEEDBACKPROC glBeginTransformFeedback;
+extern PFNGLENDTRANSFORMFEEDBACKPROC glEndTransformFeedback;
+extern PFNGLTRANSFORMFEEDBACKVARYINGSPROC glTransformFeedbackVaryings;
+extern PFNGLBINDBUFFERRANGEPROC glBindBufferRange;
+
 //GL_ARB_debug_output
 extern PFNGLDEBUGMESSAGECONTROLARBPROC glDebugMessageControlARB;
 extern PFNGLDEBUGMESSAGEINSERTARBPROC glDebugMessageInsertARB;

indra/llrender/llglslshader.cpp

 }
 
 BOOL LLGLSLShader::createShader(vector<string> * attributes,
-								vector<string> * uniforms)
+								vector<string> * uniforms,
+								U32 varying_count,
+								const char** varyings)
 {
 	//reloading, reset matrix hash values
 	for (U32 i = 0; i < LLRender::NUM_MATRIX_MODES; ++i)
 		mFeatures.mIndexedTextureChannels = llmin(mFeatures.mIndexedTextureChannels, 1);
 	}
 
+	if (varying_count > 0 && varyings)
+	{
+		glTransformFeedbackVaryings(mProgramObject, varying_count, varyings, GL_INTERLEAVED_ATTRIBS);
+	}
+
 	// Map attributes and uniforms
 	if (success)
 	{

indra/llrender/llglslshader.h

 
 	void unload();
 	BOOL createShader(std::vector<std::string> * attributes,
-						std::vector<std::string> * uniforms);
+						std::vector<std::string> * uniforms,
+						U32 varying_count = 0,
+						const char** varyings = NULL);
 	BOOL attachObject(std::string object);
 	void attachObject(GLhandleARB object);
 	void attachObjects(GLhandleARB* objects = NULL, S32 count = 0);

indra/llrender/llimagegl.cpp

 S32 LLImageGL::sBoundTextureMemoryInBytes		= 0;
 S32 LLImageGL::sCurBoundTextureMemory	= 0;
 S32 LLImageGL::sCount					= 0;
-std::list<U32> LLImageGL::sDeadTextureList;
+std::list<U32> LLImageGL::sDeadTextureList[LLTexUnit::TT_NONE];
+U32 LLImageGL::sCurTexName = 1;
 
 BOOL LLImageGL::sGlobalUseAnisotropic	= FALSE;
 F32 LLImageGL::sLastFrameTime			= 0.f;
 }
 
 // static
-void LLImageGL::generateTextures(S32 numTextures, U32 *textures)
+void LLImageGL::generateTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures)
 {
-	glGenTextures(numTextures, (GLuint*)textures);
+	for (S32 i = 0; i < numTextures; ++i)
+	{
+		if (!sDeadTextureList[type].empty())
+		{
+			textures[i] = sDeadTextureList[type].front();
+			sDeadTextureList[type].pop_front();
+		}
+		else
+		{
+			textures[i] = sCurTexName++;
+		}
+	}
 }
 
 // static
-void LLImageGL::deleteTextures(S32 numTextures, U32 *textures, bool immediate)
+void LLImageGL::deleteTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures, bool immediate)
 {
-	for (S32 i = 0; i < numTextures; i++)
-	{
-		sDeadTextureList.push_back(textures[i]);
+	for (S32 i = 0; i < numTextures; ++i)
+	{ //remove texture from VRAM by setting its size to zero
+		gGL.getTexUnit(0)->bindManual(type, textures[i]);
+
+		if (type == LLTexUnit::TT_CUBE_MAP)
+		{
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+		}
+		else
+		{
+			glTexImage2D(LLTexUnit::getInternalType(type), 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+		}
+		sDeadTextureList[type].push_back(textures[i]);
 	}
-
-	if (immediate)
+	
+	/*if (immediate)
 	{
 		LLImageGL::deleteDeadTextures();
-	}
+	}*/
 }
 
 // static
 
 	if(mTexName)
 	{
-		glDeleteTextures(1, (reinterpret_cast<GLuint*>(&mTexName))) ;
+		LLImageGL::deleteTextures(mBindTarget, 1, (reinterpret_cast<GLuint*>(&mTexName))) ;
 	}
 	
-	glGenTextures(1, (GLuint*)&mTexName);
+
+	LLImageGL::generateTextures(mBindTarget, 1, &mTexName);
 	stop_glerror();
 	if (!mTexName)
 	{
 	}
 	else
 	{
-		LLImageGL::generateTextures(1, &mTexName);
+		LLImageGL::generateTextures(mBindTarget, 1, &mTexName);
 		stop_glerror();
 		{
 			llverify(gGL.getTexUnit(0)->bind(this));
 			decTextureCounter(mTextureMemory, mComponents, mCategory) ;
 		}
 
-		LLImageGL::deleteTextures(1, &old_name);
+		LLImageGL::deleteTextures(mBindTarget,1, &old_name);
 
 		stop_glerror();
 	}
 {
 	bool reset = false;
 
-	while (!sDeadTextureList.empty())
+	/*while (!sDeadTextureList.empty())
 	{
 		GLuint tex = sDeadTextureList.front();
 		sDeadTextureList.pop_front();
 		
 		glDeleteTextures(1, &tex);
 		stop_glerror();
-	}
+	}*/
 
 	if (reset)
 	{
 			mTextureMemory = 0;
 		}
 		
-		LLImageGL::deleteTextures(1, &mTexName);			
+		LLImageGL::deleteTextures(mBindTarget, 1, &mTexName);			
 		mTexName = 0;
 		mCurrentDiscardLevel = -1 ; //invalidate mCurrentDiscardLevel.
 		mGLTextureCreated = FALSE ;

indra/llrender/llimagegl.h

 {
 	friend class LLTexUnit;
 public:
-	static std::list<U32> sDeadTextureList;
+	static U32 sCurTexName;
+	static std::list<U32> sDeadTextureList[LLTexUnit::TT_NONE];
 
+	// These 2 functions replace glGenTextures() and glDeleteTextures()
+	static void generateTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures);
+	static void deleteTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures, bool immediate = false);
 	static void deleteDeadTextures();
 
 	// Size calculation
 	void setComponents(S32 ncomponents) { mComponents = (S8)ncomponents ;}
 	void setAllowCompression(bool allow) { mAllowCompression = allow; }
 
-	// These 3 functions currently wrap glGenTextures(), glDeleteTextures(), and glTexImage2D() 
-	// for tracking purposes and will be deprecated in the future
-	static void generateTextures(S32 numTextures, U32 *textures);
-	static void deleteTextures(S32 numTextures, U32 *textures, bool immediate = false);
 	static void setManualImage(U32 target, S32 miplevel, S32 intformat, S32 width, S32 height, U32 pixformat, U32 pixtype, const void *pixels, bool allow_compression = true);
 
 	BOOL createGLTexture() ;

indra/llrender/llrendertarget.cpp

 	}
 
 	U32 tex;
-	LLImageGL::generateTextures(1, &tex);
+	LLImageGL::generateTextures(mUsage, 1, &tex);
 	gGL.getTexUnit(0)->bindManual(mUsage, tex);
 
 	stop_glerror();
 	}
 	else
 	{
-		LLImageGL::generateTextures(1, &mDepth);
+		LLImageGL::generateTextures(mUsage, 1, &mDepth);
 		gGL.getTexUnit(0)->bindManual(mUsage, mDepth);
 		
 		U32 internal_type = LLTexUnit::getInternalType(mUsage);
 		}
 		else
 		{
-			LLImageGL::deleteTextures(1, &mDepth, true);
+			LLImageGL::deleteTextures(mUsage, 1, &mDepth, true);
 			stop_glerror();
 		}
 		mDepth = 0;
 	if (mTex.size() > 0)
 	{
 		sBytesAllocated -= mResX*mResY*4*mTex.size();
-		LLImageGL::deleteTextures(mTex.size(), &mTex[0], true);
+		LLImageGL::deleteTextures(mUsage, mTex.size(), &mTex[0], true);
 		mTex.clear();
 	}
 	

indra/llrender/llvertexbuffer.cpp

 
 U32 LLVBOPool::sBytesPooled = 0;
 U32 LLVBOPool::sIndexBytesPooled = 0;
+U32 LLVBOPool::sCurGLName = 1;
+
+std::list<U32> LLVertexBuffer::sAvailableVAOName;
+U32 LLVertexBuffer::sCurVAOName = 1;
+
 U32 LLVertexBuffer::sAllocatedIndexBytes = 0;
 U32 LLVertexBuffer::sIndexCount = 0;
 
 bool LLVertexBuffer::sUseVAO = false;
 bool LLVertexBuffer::sPreferStreamDraw = false;
 
-const U32 FENCE_WAIT_TIME_NANOSECONDS = 10000;  //1 ms
 
-class LLGLSyncFence : public LLGLFence
+U32 LLVBOPool::genBuffer()
 {
-public:
-#ifdef GL_ARB_sync
-	GLsync mSync;
-#endif
-	
-	LLGLSyncFence()
+	U32 ret = 0;
+
+	if (mGLNamePool.empty())
 	{
-#ifdef GL_ARB_sync
-		mSync = 0;
-#endif
+		ret = sCurGLName++;
+	}
+	else
+	{
+		ret = mGLNamePool.front();
+		mGLNamePool.pop_front();
 	}
 
-	virtual ~LLGLSyncFence()
-	{
-#ifdef GL_ARB_sync
-		if (mSync)
-		{
-			glDeleteSync(mSync);
-		}
-#endif
-	}
+	return ret;
+}
 
-	void placeFence()
-	{
-#ifdef GL_ARB_sync
-		if (mSync)
-		{
-			glDeleteSync(mSync);
-		}
-		mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-#endif
-	}
+void LLVBOPool::deleteBuffer(U32 name)
+{
+	LLVertexBuffer::unbind();
 
-	void wait()
-	{
-#ifdef GL_ARB_sync
-		if (mSync)
-		{
-			while (glClientWaitSync(mSync, 0, FENCE_WAIT_TIME_NANOSECONDS) == GL_TIMEOUT_EXPIRED)
-			{ //track the number of times we've waited here
-				static S32 waits = 0;
-				waits++;
-			}
-		}
-#endif
-	}
+	glBindBufferARB(mType, name);
+	glBufferDataARB(mType, 0, NULL, mUsage);
 
+	llassert(std::find(mGLNamePool.begin(), mGLNamePool.end(), name) == mGLNamePool.end());
 
-};
+	mGLNamePool.push_back(name);
+
+	LLVertexBuffer::unbind();
+}
+
 
 LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
 : mUsage(vboUsage), mType(vboType)
 	std::fill(mMissCount.begin(), mMissCount.end(), 0);
 }
 
+static LLFastTimer::DeclareTimer FTM_VBO_GEN_BUFFER("gen buffers");
+static LLFastTimer::DeclareTimer FTM_VBO_BUFFER_DATA("glBufferData");
+
 
 volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 {
 	if (mFreeList[i].empty() || for_seed)
 	{
 		//make a new buffer
-		glGenBuffersARB(1, &name);
+		{
+			LLFastTimer t(FTM_VBO_GEN_BUFFER);
+			name = genBuffer();
+		}
 		glBindBufferARB(mType, name);
 
 		if (!for_seed && i < LL_VBO_POOL_SEED_COUNT)
 		}
 		else
 		{ //always use a true hint of static draw when allocating non-client-backed buffers
+			LLFastTimer t(FTM_VBO_BUFFER_DATA);
 			glBufferDataARB(mType, size, 0, GL_STATIC_DRAW_ARB);
 		}
 
 		mFreeList[i].push_back(rec);
 	}
 #else //no pooling
-	glDeleteBuffersARB(1, &name);
+	deleteBuffer(name);
 	ll_aligned_free_16((U8*) buffer);
 
 	if (mType == GL_ARRAY_BUFFER_ARB)
 		{
 			Record& r = l.front();
 
-			glDeleteBuffersARB(1, &r.mGLName);
-
+			deleteBuffer(r.mGLName);
+			
 			if (r.mClientData)
 			{
 				ll_aligned_free_16((void*) r.mClientData);
 	GL_LINE_LOOP,
 };
 
+//static
+U32 LLVertexBuffer::getVAOName()
+{
+	U32 ret = 0;
+
+	if (!sAvailableVAOName.empty())
+	{
+		ret = sAvailableVAOName.front();
+		sAvailableVAOName.pop_front();
+	}
+	else
+	{
+		glGenVertexArrays(1, &ret);
+	}
+
+	return ret;		
+}
+
+//static
+void LLVertexBuffer::releaseVAOName(U32 name)
+{
+	sAvailableVAOName.push_back(name);
+}
+
 
 //static
 void LLVertexBuffer::seedPools()
 	if (mGLArray)
 	{
 #if GL_ARB_vertex_array_object
-		glDeleteVertexArrays(1, &mGLArray);
+		releaseVAOName(mGLArray);
 #endif
 	}
 
 		if (gGLManager.mHasVertexArrayObject && useVBOs() && (LLRender::sGLCoreProfile || sUseVAO))
 		{
 #if GL_ARB_vertex_array_object
-			glGenVertexArrays(1, &mGLArray);
+			mGLArray = getVAOName();
 #endif
 			setupVertexArray();
 		}
 	}
 }
 
+// bind for transform feedback (quick 'n dirty)
+void LLVertexBuffer::bindForFeedback(U32 channel, U32 type, U32 index, U32 count)
+{
+	U32 offset = mOffsets[type] + sTypeSize[type]*index;
+	U32 size= (sTypeSize[type]*count);
+	glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, channel, mGLBuffer, offset, size);
+}
+
 // Set for rendering
 void LLVertexBuffer::setBuffer(U32 data_mask)
 {

indra/llrender/llvertexbuffer.h

 	static U32 sBytesPooled;
 	static U32 sIndexBytesPooled;
 	
+	static U32 sCurGLName;
+
 	LLVBOPool(U32 vboUsage, U32 vboType);
 		
 	const U32 mUsage;
 	//destroy all records in mFreeList
 	void cleanup();
 
+	U32 genBuffer();
+	void deleteBuffer(U32 name);
+
 	class Record
 	{
 	public:
 		volatile U8* mClientData;
 	};
 
+	std::list<U32> mGLNamePool;
+
 	typedef std::list<Record> record_list_t;
 	std::vector<record_list_t> mFreeList;
 	std::vector<U32> mMissCount;
 
 };
 
-class LLGLFence
-{
-public:
-	virtual void placeFence() = 0;
-	virtual void wait() = 0;
-};
 
 //============================================================================
 // base class 
 	static LLVBOPool sStreamIBOPool;
 	static LLVBOPool sDynamicIBOPool;
 
+	static std::list<U32> sAvailableVAOName;
+	static U32 sCurVAOName;
+
 	static bool	sUseStreamDraw;
 	static bool sUseVAO;
 	static bool	sPreferStreamDraw;
 
 	static void seedPools();
 
+	static U32 getVAOName();
+	static void releaseVAOName(U32 name);
+
 	static void initClass(bool use_vbo, bool no_vbo_mapping);
 	static void cleanupClass();
 	static void setupClientArrays(U32 data_mask);
+	static void pushPositions(U32 mode, const LLVector4a* pos, U32 count);
 	static void drawArrays(U32 mode, const std::vector<LLVector3>& pos, const std::vector<LLVector3>& norm);
 	static void drawElements(U32 mode, const LLVector4a* pos, const LLVector2* tc, S32 num_indices, const U16* indicesp);
 
 	void 	destroyGLIndices();
 	void	updateNumVerts(S32 nverts);
 	void	updateNumIndices(S32 nindices); 
-	bool	useVBOs() const;
 	void	unmapBuffer();
 		
 public:
 	volatile U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range);
 	volatile U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);
 
+	void bindForFeedback(U32 channel, U32 type, U32 index, U32 count);
+
 	// set for rendering
 	virtual void	setBuffer(U32 data_mask); 	// calls  setupVertexBuffer() if data_mask is not 0
 	void flush(); //flush pending data to GL memory
 	bool getNormalStrider(LLStrider<LLVector3>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getBinormalStrider(LLStrider<LLVector3>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getColorStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
+	bool getTextureIndexStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getEmissiveStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getWeightStrider(LLStrider<F32>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getWeight4Strider(LLStrider<LLVector4>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getClothWeightStrider(LLStrider<LLVector4>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	
 
+	bool useVBOs() const;
 	bool isEmpty() const					{ return mEmpty; }
 	bool isLocked() const					{ return mVertexLocked || mIndexLocked; }
 	S32 getNumVerts() const					{ return mNumVerts; }

indra/newview/app_settings/settings.xml

     <key>RenderUseVAO</key>
     <map>
       <key>Comment</key>
-      <string>Use GL Vertex Array Objects</string>
-      <key>Persist</key>
-      <integer>1</integer>
-      <key>Type</key>
-      <string>Boolean</string>
-      <key>Value</key>
-      <integer>0</integer>
-    </map>
-    <key>RenderVBOMappingDisable</key>
+      <string>[EXPERIMENTAL] Use GL Vertex Array Objects</string>
+      <key>Persist</key>
+      <integer>1</integer>
+      <key>Type</key>
+      <string>Boolean</string>
+      <key>Value</key>
+      <integer>0</integer>
+    </map>
+  <key>RenderUseTransformFeedback</key>
+  <map>
+    <key>Comment</key>
+    <string>[EXPERIMENTAL] Use transform feedback shaders for LoD updates</string>
+    <key>Persist</key>
+    <integer>1</integer>
+    <key>Type</key>
+    <string>Boolean</string>
+    <key>Value</key>
+    <integer>0</integer>
+  </map>
+
+  <key>RenderVBOMappingDisable</key>
     <map>
       <key>Comment</key>
       <string>Disable VBO glMapBufferARB</string>
       <key>Value</key>
       <integer>1</integer>
     </map>
+  <key>RenderSynchronousOcclusion</key>
+  <map>
+    <key>Comment</key>
+    <string>Don't let occlusion queries get more than one frame behind (block until they complete).</string>
+    <key>Persist</key>
+    <integer>1</integer>
+    <key>Type</key>
+    <string>Boolean</string>
+    <key>Value</key>
+    <integer>1</integer>
+  </map>
     <key>RenderDelayVBUpdate</key>
     <map>
       <key>Comment</key>

indra/newview/lldrawpoolalpha.cpp

 			{
 				LLDrawInfo& params = **k;
 
+				if ((params.mVertexBuffer->getTypeMask() & mask) != mask)
+				{ //FIXME!
+					//llwarns << "Missing required components, skipping render batch." << llendl;
+					continue;
+				}
+
 				LLRenderPass::applyModelMatrix(params);
 
 				

indra/newview/llface.cpp

 #include "pipeline.h"
 #include "llviewerregion.h"
 #include "llviewerwindow.h"
+#include "llviewershadermgr.h"
+
 
 #define LL_MAX_INDICES_COUNT 1000000
 
 
 #define DOTVEC(a,b) (a.mV[0]*b.mV[0] + a.mV[1]*b.mV[1] + a.mV[2]*b.mV[2])
 
-
 /*
 For each vertex, given:
 	B - binormal
 }
 
 
+static LLFastTimer::DeclareTimer FTM_FACE_GEOM_VOLUME("Volume VB Cache");
+
+//static 
+void LLFace::cacheFaceInVRAM(const LLVolumeFace& vf)
+{
+	LLFastTimer t(FTM_FACE_GEOM_VOLUME);
+	U32 mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 |
+				LLVertexBuffer::MAP_BINORMAL | LLVertexBuffer::MAP_NORMAL;
+	
+	if (vf.mWeights)
+	{
+		mask |= LLVertexBuffer::MAP_WEIGHT4;
+	}
+
+	LLVertexBuffer* buff = new LLVertexBuffer(mask, GL_STATIC_DRAW_ARB);
+	vf.mVertexBuffer = buff;
+
+	buff->allocateBuffer(vf.mNumVertices, 0, true);
+
+	LLStrider<LLVector4a> f_vert;
+	LLStrider<LLVector3> f_binorm;
+	LLStrider<LLVector3> f_norm;
+	LLStrider<LLVector2> f_tc;
+
+	buff->getBinormalStrider(f_binorm);
+	buff->getVertexStrider(f_vert);
+	buff->getNormalStrider(f_norm);
+	buff->getTexCoord0Strider(f_tc);
+
+	for (U32 i = 0; i < vf.mNumVertices; ++i)
+	{
+		*f_vert++ = vf.mPositions[i];
+		(*f_binorm++).set(vf.mBinormals[i].getF32ptr());
+		*f_tc++ = vf.mTexCoords[i];
+		(*f_norm++).set(vf.mNormals[i].getF32ptr());
+	}
+
+	if (vf.mWeights)
+	{
+		LLStrider<LLVector4> f_wght;
+		buff->getWeight4Strider(f_wght);
+		for (U32 i = 0; i < vf.mNumVertices; ++i)
+		{
+			(*f_wght++).set(vf.mWeights[i].getF32ptr());
+		}
+	}
+
+	buff->flush();
+}
+
+//helper function for pushing primitives for transform shaders and cleaning up
+//uninitialized data on the tail, plus tracking number of expected primitives
+void push_for_transform(LLVertexBuffer* buff, U32 source_count, U32 dest_count)
+{
+	if (source_count > 0 && dest_count >= source_count) //protect against possible U32 wrapping
+	{
+		//push source primitives
+		buff->drawArrays(LLRender::POINTS, 0, source_count);
+		U32 tail = dest_count-source_count;
+		for (U32 i = 0; i < tail; ++i)
+		{ //copy last source primitive into each element in tail
+			buff->drawArrays(LLRender::POINTS, source_count-1, 1);
+		}
+		gPipeline.mTransformFeedbackPrimitives += dest_count;
+	}
+}
+
 static LLFastTimer::DeclareTimer FTM_FACE_GET_GEOM("Face Geom");
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_POSITION("Position");
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_NORMAL("Normal");
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK("Quick");
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_NO_XFORM("No Xform");
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_XFORM("Xform");
-
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_PLANAR("Quick Planar");
 
 BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 	LLMatrix4a mat_normal;
 	mat_normal.loadu(mat_norm_in);
 	
-	//if it's not fullbright and has no normals, bake sunlight based on face normal
-	//bool bake_sunlight = !getTextureEntry()->getFullbright() &&
-	//  !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL);
-
 	F32 r = 0, os = 0, ot = 0, ms = 0, mt = 0, cos_ang = 0, sin_ang = 0;
-
+	bool do_xform = false;
 	if (rebuild_tcoord)
 	{
-		LLFastTimer t(FTM_FACE_GEOM_TEXTURE);
-		bool do_xform;
-			
 		if (tep)
 		{
 			r  = tep->getRotation();
 		{
 			do_xform = false;
 		}
-						
-		//bump setup
-		LLVector4a binormal_dir( -sin_ang, cos_ang, 0.f );
-		LLVector4a bump_s_primary_light_ray(0.f, 0.f, 0.f);
-		LLVector4a bump_t_primary_light_ray(0.f, 0.f, 0.f);
-
-		LLQuaternion bump_quat;
-		if (mDrawablep->isActive())
-		{
-			bump_quat = LLQuaternion(mDrawablep->getRenderMatrix());
-		}
-		
-		if (bump_code)
+	}
+	
+	static LLCachedControl<bool> use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback");
+
+	if (use_transform_feedback &&
+		gTransformPositionProgram.mProgramObject && //transform shaders are loaded
+		mVertexBuffer->useVBOs() && //target buffer is in VRAM
+		!rebuild_weights && //TODO: add support for weights
+		!volume.isUnique()) //source volume is NOT flexi
+	{ //use transform feedback to pack vertex buffer
+
+		LLVertexBuffer* buff = (LLVertexBuffer*) vf.mVertexBuffer.get();
+
+		if (vf.mVertexBuffer.isNull() || buff->getNumVerts() != vf.mNumVertices)
 		{
 			mVObjp->getVolume()->genBinormals(f);
-			F32 offset_multiple; 
-			switch( bump_code )
+			LLFace::cacheFaceInVRAM(vf);
+			buff = (LLVertexBuffer*) vf.mVertexBuffer.get();
+		}		
+
+		LLGLSLShader* cur_shader = LLGLSLShader::sCurBoundShaderPtr;
+		
+		gGL.pushMatrix();
+		gGL.loadMatrix((GLfloat*) mat_vert_in.mMatrix);
+
+		if (rebuild_pos)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_POSITION);
+			gTransformPositionProgram.bind();
+
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_VERTEX, mGeomIndex, mGeomCount);
+
+			U8 index = mTextureIndex < 255 ? mTextureIndex : 0;
+
+			S32 val = 0.f;
+			U8* vp = (U8*) &val;
+			vp[0] = index;
+			vp[1] = 0;
+			vp[2] = 0;
+			vp[3] = 0;
+			
+			gTransformPositionProgram.uniform1i("texture_index_in", val);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
+
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_color)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_COLOR);
+			gTransformColorProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_COLOR, mGeomIndex, mGeomCount);
+
+			S32 val = *((S32*) color.mV);
+
+			gTransformColorProgram.uniform1i("color_in", val);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_emissive)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_EMISSIVE);
+			gTransformColorProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_EMISSIVE, mGeomIndex, mGeomCount);
+
+			U8 glow = (U8) llclamp((S32) (getTextureEntry()->getGlow()*255), 0, 255);
+
+			S32 glow32 = glow |
+						 (glow << 8) |
+						 (glow << 16) |
+						 (glow << 24);
+
+			gTransformColorProgram.uniform1i("color_in", glow32);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_normal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_NORMAL);
+			gTransformNormalProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_NORMAL, mGeomIndex, mGeomCount);
+						
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_NORMAL);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_binormal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
+			gTransformBinormalProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_BINORMAL, mGeomIndex, mGeomCount);
+						
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_BINORMAL);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_tcoord)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_TEXTURE);
+			gTransformTexCoordProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_TEXCOORD0, mGeomIndex, mGeomCount);
+						
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_TEXCOORD0);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+
+			bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1);
+
+			if (do_bump)
 			{
-				case BE_NO_BUMP:
-				offset_multiple = 0.f;
-				break;
-				case BE_BRIGHTNESS:
-				case BE_DARKNESS:
-				if( mTexture.notNull() && mTexture->hasGLTexture())
+				mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_TEXCOORD1, mGeomIndex, mGeomCount);
+				glBeginTransformFeedback(GL_POINTS);
+				buff->setBuffer(LLVertexBuffer::MAP_TEXCOORD0);
+				push_for_transform(buff, vf.mNumVertices, mGeomCount);
+				glEndTransformFeedback();
+			}				
+		}
+
+		glBindBufferARB(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
+
+		gGL.popMatrix();
+
+		if (cur_shader)
+		{
+			cur_shader->bind();
+		}
+	}
+	else
+	{
+		//if it's not fullbright and has no normals, bake sunlight based on face normal
+		//bool bake_sunlight = !getTextureEntry()->getFullbright() &&
+		//  !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL);
+
+		if (rebuild_tcoord)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_TEXTURE);
+									
+			//bump setup
+			LLVector4a binormal_dir( -sin_ang, cos_ang, 0.f );
+			LLVector4a bump_s_primary_light_ray(0.f, 0.f, 0.f);
+			LLVector4a bump_t_primary_light_ray(0.f, 0.f, 0.f);
+
+			LLQuaternion bump_quat;
+			if (mDrawablep->isActive())
+			{
+				bump_quat = LLQuaternion(mDrawablep->getRenderMatrix());
+			}
+		
+			if (bump_code)
+			{
+				mVObjp->getVolume()->genBinormals(f);
+				F32 offset_multiple; 
+				switch( bump_code )
 				{
-					// Offset by approximately one texel
-					S32 cur_discard = mTexture->getDiscardLevel();
-					S32 max_size = llmax( mTexture->getWidth(), mTexture->getHeight() );
-					max_size <<= cur_discard;
-					const F32 ARTIFICIAL_OFFSET = 2.f;
-					offset_multiple = ARTIFICIAL_OFFSET / (F32)max_size;
+					case BE_NO_BUMP:
+					offset_multiple = 0.f;
+					break;
+					case BE_BRIGHTNESS:
+					case BE_DARKNESS:
+					if( mTexture.notNull() && mTexture->hasGLTexture())
+					{
+						// Offset by approximately one texel
+						S32 cur_discard = mTexture->getDiscardLevel();
+						S32 max_size = llmax( mTexture->getWidth(), mTexture->getHeight() );
+						max_size <<= cur_discard;
+						const F32 ARTIFICIAL_OFFSET = 2.f;
+						offset_multiple = ARTIFICIAL_OFFSET / (F32)max_size;
+					}
+					else
+					{
+						offset_multiple = 1.f/256;
+					}
+					break;
+
+					default:  // Standard bumpmap textures.  Assumed to be 256x256
+					offset_multiple = 1.f / 256;
+					break;
+				}
+
+				F32 s_scale = 1.f;
+				F32 t_scale = 1.f;
+				if( tep )
+				{
+					tep->getScale( &s_scale, &t_scale );
+				}
+				// Use the nudged south when coming from above sun angle, such
+				// that emboss mapping always shows up on the upward faces of cubes when 
+				// it's noon (since a lot of builders build with the sun forced to noon).
+				LLVector3   sun_ray  = gSky.mVOSkyp->mBumpSunDir;
+				LLVector3   moon_ray = gSky.getMoonDirection();
+				LLVector3& primary_light_ray = (sun_ray.mV[VZ] > 0) ? sun_ray : moon_ray;
+
+				bump_s_primary_light_ray.load3((offset_multiple * s_scale * primary_light_ray).mV);
+				bump_t_primary_light_ray.load3((offset_multiple * t_scale * primary_light_ray).mV);
+			}
+
+			U8 texgen = getTextureEntry()->getTexGen();
+			if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT)
+			{ //planar texgen needs binormals
+				mVObjp->getVolume()->genBinormals(f);
+			}
+
+			U8 tex_mode = 0;
+	
+			if (isState(TEXTURE_ANIM))
+			{
+				LLVOVolume* vobj = (LLVOVolume*) (LLViewerObject*) mVObjp;	
+				tex_mode = vobj->mTexAnimMode;
+
+				if (!tex_mode)
+				{
+					clearState(TEXTURE_ANIM);
 				}
 				else
 				{
-					offset_multiple = 1.f/256;
+					os = ot = 0.f;
+					r = 0.f;
+					cos_ang = 1.f;
+					sin_ang = 0.f;
+					ms = mt = 1.f;
+
+					do_xform = false;
 				}
-				break;
-
-				default:  // Standard bumpmap textures.  Assumed to be 256x256
-				offset_multiple = 1.f / 256;
-				break;
+
+				if (getVirtualSize() >= MIN_TEX_ANIM_SIZE)
+				{ //don't override texture transform during tc bake
+					tex_mode = 0;
+				}
 			}
 
-			F32 s_scale = 1.f;
-			F32 t_scale = 1.f;
-			if( tep )
-			{
-				tep->getScale( &s_scale, &t_scale );
-			}
-			// Use the nudged south when coming from above sun angle, such
-			// that emboss mapping always shows up on the upward faces of cubes when 
-			// it's noon (since a lot of builders build with the sun forced to noon).
-			LLVector3   sun_ray  = gSky.mVOSkyp->mBumpSunDir;
-			LLVector3   moon_ray = gSky.getMoonDirection();
-			LLVector3& primary_light_ray = (sun_ray.mV[VZ] > 0) ? sun_ray : moon_ray;
-
-			bump_s_primary_light_ray.load3((offset_multiple * s_scale * primary_light_ray).mV);
-			bump_t_primary_light_ray.load3((offset_multiple * t_scale * primary_light_ray).mV);
-		}
-
-		U8 texgen = getTextureEntry()->getTexGen();
-		if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT)
-		{ //planar texgen needs binormals
-			mVObjp->getVolume()->genBinormals(f);
-		}
-
-		U8 tex_mode = 0;
-	
-		if (isState(TEXTURE_ANIM))
-		{
-			LLVOVolume* vobj = (LLVOVolume*) (LLViewerObject*) mVObjp;	
-			tex_mode = vobj->mTexAnimMode;
-
-			if (!tex_mode)
-			{
-				clearState(TEXTURE_ANIM);
-			}
-			else
-			{
-				os = ot = 0.f;
-				r = 0.f;
-				cos_ang = 1.f;
-				sin_ang = 0.f;
-				ms = mt = 1.f;
-
-				do_xform = false;
-			}
-
-			if (getVirtualSize() >= MIN_TEX_ANIM_SIZE)
-			{ //don't override texture transform during tc bake
-				tex_mode = 0;
-			}
-		}
-
-		LLVector4a scalea;
-		scalea.load3(scale.mV);
-
-		bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1);
-		bool do_tex_mat = tex_mode && mTextureMatrix;
-
-		if (!in_atlas && !do_bump)
-		{ //not in atlas or not bump mapped, might be able to do a cheap update
-			mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount);
-
-			if (texgen != LLTextureEntry::TEX_GEN_PLANAR)
-			{
-				LLFastTimer t(FTM_FACE_TEX_QUICK);
-				if (!do_tex_mat)
+			LLVector4a scalea;
+			scalea.load3(scale.mV);
+
+			bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1);
+			bool do_tex_mat = tex_mode && mTextureMatrix;
+
+			if (!in_atlas && !do_bump)
+			{ //not in atlas or not bump mapped, might be able to do a cheap update
+				mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount);
+
+				if (texgen != LLTextureEntry::TEX_GEN_PLANAR)
 				{
-					if (!do_xform)
+					LLFastTimer t(FTM_FACE_TEX_QUICK);
+					if (!do_tex_mat)
 					{
-						LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
-						LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
+						if (!do_xform)
+						{
+							LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
+							LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
+						}
+						else
+						{
+							LLFastTimer t(FTM_FACE_TEX_QUICK_XFORM);
+							F32* dst = (F32*) tex_coords.get();
+							LLVector4a* src = (LLVector4a*) vf.mTexCoords;
+
+							LLVector4a trans;
+							trans.splat(-0.5f);
+
+							LLVector4a rot0;
+							rot0.set(cos_ang, -sin_ang, cos_ang, -sin_ang);
+
+							LLVector4a rot1;
+							rot1.set(sin_ang, cos_ang, sin_ang, cos_ang);
+
+							LLVector4a scale;
+							scale.set(ms, mt, ms, mt);
+
+							LLVector4a offset;
+							offset.set(os+0.5f, ot+0.5f, os+0.5f, ot+0.5f);
+
+							LLVector4Logical mask;
+							mask.clear();
+							mask.setElement<2>();
+							mask.setElement<3>();
+
+							U32 count = num_vertices/2 + num_vertices%2;
+
+							for (S32 i = 0; i < count; i++)
+							{	
+								LLVector4a res = *src++;
+								xform4a(res, trans, mask, rot0, rot1, offset, scale);
+								res.store4a(dst);
+								dst += 4;
+							}
+						}
 					}
 					else
-					{
-						LLFastTimer t(FTM_FACE_TEX_QUICK_XFORM);
-						F32* dst = (F32*) tex_coords.get();
-						LLVector4a* src = (LLVector4a*) vf.mTexCoords;
-
-						LLVector4a trans;
-						trans.splat(-0.5f);
-
-						LLVector4a rot0;
-						rot0.set(cos_ang, -sin_ang, cos_ang, -sin_ang);
-
-						LLVector4a rot1;
-						rot1.set(sin_ang, cos_ang, sin_ang, cos_ang);
-
-						LLVector4a scale;
-						scale.set(ms, mt, ms, mt);
-
-						LLVector4a offset;
-						offset.set(os+0.5f, ot+0.5f, os+0.5f, ot+0.5f);
-
-						LLVector4Logical mask;
-						mask.clear();
-						mask.setElement<2>();
-						mask.setElement<3>();
-
-						U32 count = num_vertices/2 + num_vertices%2;
-
-						for (S32 i = 0; i < count; i++)
+					{ //do tex mat, no texgen, no atlas, no bump
+						for (S32 i = 0; i < num_vertices; i++)
 						{	
-							LLVector4a res = *src++;
-							xform4a(res, trans, mask, rot0, rot1, offset, scale);
-							res.store4a(dst);
-							dst += 4;
+							LLVector2 tc(vf.mTexCoords[i]);
+							//LLVector4a& norm = vf.mNormals[i];
+							//LLVector4a& center = *(vf.mCenter);
+
+							LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+							tmp = tmp * *mTextureMatrix;
+							tc.mV[0] = tmp.mV[0];
+							tc.mV[1] = tmp.mV[1];
+							*tex_coords++ = tc;	
 						}
 					}
 				}
 				else
-				{ //do tex mat, no texgen, no atlas, no bump
-					for (S32 i = 0; i < num_vertices; i++)
-					{	
-						LLVector2 tc(vf.mTexCoords[i]);
-						//LLVector4a& norm = vf.mNormals[i];
-						//LLVector4a& center = *(vf.mCenter);
-
+				{ //no bump, no atlas, tex gen planar
+					LLFastTimer t(FTM_FACE_TEX_QUICK_PLANAR);
+					if (do_tex_mat)
+					{
+						for (S32 i = 0; i < num_vertices; i++)
+						{	
+							LLVector2 tc(vf.mTexCoords[i]);
+							LLVector4a& norm = vf.mNormals[i];
+							LLVector4a& center = *(vf.mCenter);
+							LLVector4a vec = vf.mPositions[i];	
+							vec.mul(scalea);
+							planarProjection(tc, norm, center, vec);
+						
+							LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+							tmp = tmp * *mTextureMatrix;
+							tc.mV[0] = tmp.mV[0];
+							tc.mV[1] = tmp.mV[1];
+				
+							*tex_coords++ = tc;	
+						}
+					}
+					else
+					{
+						for (S32 i = 0; i < num_vertices; i++)
+						{	
+							LLVector2 tc(vf.mTexCoords[i]);
+							LLVector4a& norm = vf.mNormals[i];
+							LLVector4a& center = *(vf.mCenter);
+							LLVector4a vec = vf.mPositions[i];	
+							vec.mul(scalea);
+							planarProjection(tc, norm, center, vec);
+						
+							xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+
+							*tex_coords++ = tc;	
+						}
+					}
+				}
+
+				if (map_range)
+				{
+					mVertexBuffer->flush();
+				}
+			}
+			else
+			{ //either bump mapped or in atlas, just do the whole expensive loop
+				LLFastTimer t(FTM_FACE_TEX_DEFAULT);
+				mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount, map_range);
+
+				std::vector<LLVector2> bump_tc;
+		
+				for (S32 i = 0; i < num_vertices; i++)
+				{	
+					LLVector2 tc(vf.mTexCoords[i]);
+			
+					LLVector4a& norm = vf.mNormals[i];
+				
+					LLVector4a& center = *(vf.mCenter);
+		   
+					if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
+					{
+						LLVector4a vec = vf.mPositions[i];
+				
+						vec.mul(scalea);
+
+						switch (texgen)
+						{
+							case LLTextureEntry::TEX_GEN_PLANAR:
+								planarProjection(tc, norm, center, vec);
+								break;
+							case LLTextureEntry::TEX_GEN_SPHERICAL:
+								sphericalProjection(tc, norm, center, vec);
+								break;
+							case LLTextureEntry::TEX_GEN_CYLINDRICAL:
+								cylindricalProjection(tc, norm, center, vec);
+								break;
+							default:
+								break;
+						}		
+					}
+
+					if (tex_mode && mTextureMatrix)
+					{
 						LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
 						tmp = tmp * *mTextureMatrix;
 						tc.mV[0] = tmp.mV[0];
 						tc.mV[1] = tmp.mV[1];
-						*tex_coords++ = tc;	
+					}
+					else
+					{
+						xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+					}
+
+					if(in_atlas)
+					{
+						//
+						//manually calculate tex-coord per vertex for varying address modes.
+						//should be removed if shader can handle this.
+						//
+
+						S32 int_part = 0 ;
+						switch(mTexture->getAddressMode())
+						{
+						case LLTexUnit::TAM_CLAMP:
+							if(tc.mV[0] < 0.f)
+							{
+								tc.mV[0] = 0.f ;
+							}
+							else if(tc.mV[0] > 1.f)
+							{
+								tc.mV[0] = 1.f;
+							}
+
+							if(tc.mV[1] < 0.f)
+							{
+								tc.mV[1] = 0.f ;
+							}
+							else if(tc.mV[1] > 1.f)
+							{
+								tc.mV[1] = 1.f;
+							}
+							break;
+						case LLTexUnit::TAM_MIRROR:
+							if(tc.mV[0] < 0.f)
+							{
+								tc.mV[0] = -tc.mV[0] ;
+							}
+							int_part = (S32)tc.mV[0] ;
+							if(int_part & 1) //odd number
+							{
+								tc.mV[0] = int_part + 1 - tc.mV[0] ;
+							}
+							else //even number
+							{
+								tc.mV[0] -= int_part ;
+							}
+
+							if(tc.mV[1] < 0.f)
+							{
+								tc.mV[1] = -tc.mV[1] ;
+							}
+							int_part = (S32)tc.mV[1] ;
+							if(int_part & 1) //odd number
+							{
+								tc.mV[1] = int_part + 1 - tc.mV[1] ;
+							}
+							else //even number
+							{
+								tc.mV[1] -= int_part ;
+							}
+							break;
+						case LLTexUnit::TAM_WRAP:
+							if(tc.mV[0] > 1.f)
+								tc.mV[0] -= (S32)(tc.mV[0] - 0.00001f) ;
+							else if(tc.mV[0] < -1.f)
+								tc.mV[0] -= (S32)(tc.mV[0] + 0.00001f) ;
+
+							if(tc.mV[1] > 1.f)
+								tc.mV[1] -= (S32)(tc.mV[1] - 0.00001f) ;
+							else if(tc.mV[1] < -1.f)
+								tc.mV[1] -= (S32)(tc.mV[1] + 0.00001f) ;
+
+							if(tc.mV[0] < 0.f)
+							{
+								tc.mV[0] = 1.0f + tc.mV[0] ;
+							}
+							if(tc.mV[1] < 0.f)
+							{
+								tc.mV[1] = 1.0f + tc.mV[1] ;
+							}
+							break;
+						default:
+							break;
+						}
+				
+						tc.mV[0] = tcoord_xoffset + tcoord_xscale * tc.mV[0] ;
+						tc.mV[1] = tcoord_yoffset + tcoord_yscale * tc.mV[1] ;
+					}
+				
+
+					*tex_coords++ = tc;
+					if (do_bump)
+					{
+						bump_tc.push_back(tc);
+					}
+				}
+
+				if (map_range)
+				{
+					mVertexBuffer->flush();
+				}
+
+				if (do_bump)
+				{
+					mVertexBuffer->getTexCoord1Strider(tex_coords2, mGeomIndex, mGeomCount, map_range);
+		
+					for (S32 i = 0; i < num_vertices; i++)
+					{
+						LLVector4a tangent;
+						tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]);
+
+						LLMatrix4a tangent_to_object;
+						tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]);
+						LLVector4a t;
+						tangent_to_object.rotate(binormal_dir, t);
+						LLVector4a binormal;
+						mat_normal.rotate(t, binormal);
+						
+						//VECTORIZE THIS
+						if (mDrawablep->isActive())
+						{
+							LLVector3 t;
+							t.set(binormal.getF32ptr());
+							t *= bump_quat;
+							binormal.load3(t.mV);
+						}
+
+						binormal.normalize3fast();
+						LLVector2 tc = bump_tc[i];
+						tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() );
+					
+						*tex_coords2++ = tc;
+					}
+
+					if (map_range)
+					{
+						mVertexBuffer->flush();
 					}
 				}
 			}
-			else
-			{ //no bump, no atlas, tex gen planar
-				LLFastTimer t(FTM_FACE_TEX_QUICK_PLANAR);
-				if (do_tex_mat)
+		}
+
+		if (rebuild_pos)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_POSITION);
+			llassert(num_vertices > 0);
+		
+			mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range);
+			
+
+			LLMatrix4a mat_vert;
+			mat_vert.loadu(mat_vert_in);
+
+			LLVector4a* src = vf.mPositions;
+			volatile F32* dst = (volatile F32*) vert.get();
+
+			volatile F32* end = dst+num_vertices*4;
+			LLVector4a res;
+
+			LLVector4a texIdx;
+
+			U8 index = mTextureIndex < 255 ? mTextureIndex : 0;
+
+			F32 val = 0.f;
+			U8* vp = (U8*) &val;
+			vp[0] = index;
+			vp[1] = 0;
+			vp[2] = 0;
+			vp[3] = 0;
+
+			llassert(index <= LLGLSLShader::sIndexedTextureChannels-1);
+
+			LLVector4Logical mask;
+			mask.clear();
+			mask.setElement<3>();
+		
+			texIdx.set(0,0,0,val);
+
+			{
+				LLFastTimer t(FTM_FACE_POSITION_STORE);
+				LLVector4a tmp;
+
+				do
+				{	
+					mat_vert.affineTransform(*src++, res);
+					tmp.setSelectWithMask(mask, texIdx, res);
+					tmp.store4a((F32*) dst);
+					dst += 4;
+				}
+				while(dst < end);
+			}
+
+			{
+				LLFastTimer t(FTM_FACE_POSITION_PAD);
+				S32 aligned_pad_vertices = mGeomCount - num_vertices;
+				res.set(res[0], res[1], res[2], 0.f);
+
+				while (aligned_pad_vertices > 0)
 				{
-					for (S32 i = 0; i < num_vertices; i++)
-					{	
-						LLVector2 tc(vf.mTexCoords[i]);
-						LLVector4a& norm = vf.mNormals[i];
-						LLVector4a& center = *(vf.mCenter);
-						LLVector4a vec = vf.mPositions[i];	
-						vec.mul(scalea);
-						planarProjection(tc, norm, center, vec);
-						
-						LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
-						tmp = tmp * *mTextureMatrix;
-						tc.mV[0] = tmp.mV[0];
-						tc.mV[1] = tmp.mV[1];
-				
-						*tex_coords++ = tc;	
-					}
-				}
-				else
-				{
-					for (S32 i = 0; i < num_vertices; i++)
-					{	
-						LLVector2 tc(vf.mTexCoords[i]);
-						LLVector4a& norm = vf.mNormals[i];
-						LLVector4a& center = *(vf.mCenter);
-						LLVector4a vec = vf.mPositions[i];	
-						vec.mul(scalea);
-						planarProjection(tc, norm, center, vec);
-						
-						xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
-
-						*tex_coords++ = tc;	
-					}
+					--aligned_pad_vertices;
+					res.store4a((F32*) dst);
+					dst += 4;
 				}
 			}
 
 				mVertexBuffer->flush();
 			}
 		}
-		else
-		{ //either bump mapped or in atlas, just do the whole expensive loop
-			LLFastTimer t(FTM_FACE_TEX_DEFAULT);
-			mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount, map_range);
-
-			std::vector<LLVector2> bump_tc;
+
 		
+		if (rebuild_normal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_NORMAL);
+			mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
+			F32* normals = (F32*) norm.get();
+	
 			for (S32 i = 0; i < num_vertices; i++)
 			{	
-				LLVector2 tc(vf.mTexCoords[i]);
-			
-				LLVector4a& norm = vf.mNormals[i];
-				
-				LLVector4a& center = *(vf.mCenter);
-		   
-				if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
-				{
-					LLVector4a vec = vf.mPositions[i];
-				
-					vec.mul(scalea);
-
-					switch (texgen)
-					{
-						case LLTextureEntry::TEX_GEN_PLANAR:
-							planarProjection(tc, norm, center, vec);
-							break;
-						case LLTextureEntry::TEX_GEN_SPHERICAL:
-							sphericalProjection(tc, norm, center, vec);
-							break;
-						case LLTextureEntry::TEX_GEN_CYLINDRICAL:
-							cylindricalProjection(tc, norm, center, vec);
-							break;
-						default:
-							break;
-					}		
-				}
-
-				if (tex_mode && mTextureMatrix)
-				{
-					LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
-					tmp = tmp * *mTextureMatrix;
-					tc.mV[0] = tmp.mV[0];
-					tc.mV[1] = tmp.mV[1];
-				}
-				else
-				{
-					xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
-				}
-
-				if(in_atlas)
-				{
-					//
-					//manually calculate tex-coord per vertex for varying address modes.
-					//should be removed if shader can handle this.
-					//
-
-					S32 int_part = 0 ;
-					switch(mTexture->getAddressMode())
-					{
-					case LLTexUnit::TAM_CLAMP:
-						if(tc.mV[0] < 0.f)
-						{
-							tc.mV[0] = 0.f ;
-						}
-						else if(tc.mV[0] > 1.f)
-						{
-							tc.mV[0] = 1.f;
-						}
-
-						if(tc.mV[1] < 0.f)
-						{
-							tc.mV[1] = 0.f ;
-						}
-						else if(tc.mV[1] > 1.f)
-						{
-							tc.mV[1] = 1.f;
-						}
-						break;
-					case LLTexUnit::TAM_MIRROR:
-						if(tc.mV[0] < 0.f)
-						{
-							tc.mV[0] = -tc.mV[0] ;
-						}
-						int_part = (S32)tc.mV[0] ;
-						if(int_part & 1) //odd number
-						{
-							tc.mV[0] = int_part + 1 - tc.mV[0] ;
-						}
-						else //even number
-						{
-							tc.mV[0] -= int_part ;
-						}
-
-						if(tc.mV[1] < 0.f)
-						{
-							tc.mV[1] = -tc.mV[1] ;
-						}
-						int_part = (S32)tc.mV[1] ;
-						if(int_part & 1) //odd number
-						{
-							tc.mV[1] = int_part + 1 - tc.mV[1] ;
-						}
-						else //even number
-						{
-							tc.mV[1] -= int_part ;
-						}
-						break;
-					case LLTexUnit::TAM_WRAP:
-						if(tc.mV[0] > 1.f)
-							tc.mV[0] -= (S32)(tc.mV[0] - 0.00001f) ;
-						else if(tc.mV[0] < -1.f)
-							tc.mV[0] -= (S32)(tc.mV[0] + 0.00001f) ;
-
-						if(tc.mV[1] > 1.f)
-							tc.mV[1] -= (S32)(tc.mV[1] - 0.00001f) ;
-						else if(tc.mV[1] < -1.f)
-							tc.mV[1] -= (S32)(tc.mV[1] + 0.00001f) ;
-
-						if(tc.mV[0] < 0.f)
-						{
-							tc.mV[0] = 1.0f + tc.mV[0] ;
-						}
-						if(tc.mV[1] < 0.f)
-						{
-							tc.mV[1] = 1.0f + tc.mV[1] ;
-						}
-						break;
-					default:
-						break;
-					}
-				
-					tc.mV[0] = tcoord_xoffset + tcoord_xscale * tc.mV[0] ;
-					tc.mV[1] = tcoord_yoffset + tcoord_yscale * tc.mV[1] ;
-				}
-				
-
-				*tex_coords++ = tc;
-				if (do_bump)
-				{
-					bump_tc.push_back(tc);
-				}
+				LLVector4a normal;
+				mat_normal.rotate(vf.mNormals[i], normal);
+				normal.normalize3fast();
+				normal.store4a(normals);
+				normals += 4;
 			}
 
 			if (map_range)
 			{
 				mVertexBuffer->flush();
 			}
-
-			if (do_bump)
+		}
+		
+		if (rebuild_binormal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
+			mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range);
+			F32* binormals = (F32*) binorm.get();
+		
+			for (S32 i = 0; i < num_vertices; i++)
+			{	
+				LLVector4a binormal;
+				mat_normal.rotate(vf.mBinormals[i], binormal);
+				binormal.normalize3fast();
+				binormal.store4a(binormals);
+				binormals += 4;
+			}
+
+			if (map_range)
 			{
-				mVertexBuffer->getTexCoord1Strider(tex_coords2, mGeomIndex, mGeomCount, map_range);
+				mVertexBuffer->flush();
+			}
+		}
+	
+		if (rebuild_weights && vf.mWeights)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_WEIGHTS);
+			mVertexBuffer->getWeight4Strider(wght, mGeomIndex, mGeomCount, map_range);
+			F32* weights = (F32*) wght.get();
+			LLVector4a::memcpyNonAliased16(weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
+		}
+
+		if (rebuild_color && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_COLOR) )
+		{
+			LLFastTimer t(FTM_FACE_GEOM_COLOR);
+			mVertexBuffer->getColorStrider(colors, mGeomIndex, mGeomCount, map_range);
+
+			LLVector4a src;
+
+			U32 vec[4];
+			vec[0] = vec[1] = vec[2] = vec[3] = color.mAll;
 		
-				for (S32 i = 0; i < num_vertices; i++)
-				{
-					LLVector4a tangent;
-					tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]);
-
-					LLMatrix4a tangent_to_object;
-					tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]);
-					LLVector4a t;
-					tangent_to_object.rotate(binormal_dir, t);
-					LLVector4a binormal;
-					mat_normal.rotate(t, binormal);
-						
-					//VECTORIZE THIS
-					if (mDrawablep->isActive())
-					{
-						LLVector3 t;
-						t.set(binormal.getF32ptr());
-						t *= bump_quat;
-						binormal.load3(t.mV);
-					}
-
-					binormal.normalize3fast();
-					LLVector2 tc = bump_tc[i];
-					tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() );
-					
-					*tex_coords2++ = tc;
-				}
-
-				if (map_range)
-				{
-					mVertexBuffer->flush();
-				}
+			src.loadua((F32*) vec);
+
+			F32* dst = (F32*) colors.get();
+			S32 num_vecs = num_vertices/4;
+			if (num_vertices%4 > 0)
+			{
+				++num_vecs;
+			}
+
+			for (S32 i = 0; i < num_vecs; i++)
+			{	
+				src.store4a(dst);
+				dst += 4;
+			}
+
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
+		}
+
+		if (rebuild_emissive)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_EMISSIVE);
+			LLStrider<LLColor4U> emissive;
+			mVertexBuffer->getEmissiveStrider(emissive, mGeomIndex, mGeomCount, map_range);
+
+			U8 glow = (U8) llclamp((S32) (getTextureEntry()->getGlow()*255), 0, 255);
+
+			LLVector4a src;
+
+		
+			U32 glow32 = glow |
+						 (glow << 8) |
+						 (glow << 16) |
+						 (glow << 24);
+
+			U32 vec[4];
+			vec[0] = vec[1] = vec[2] = vec[3] = glow32;
+		
+			src.loadua((F32*) vec);
+
+			F32* dst = (F32*) emissive.get();
+			S32 num_vecs = num_vertices/4;
+			if (num_vertices%4 > 0)
+			{
+				++num_vecs;
+			}
+
+			for (S32 i = 0; i < num_vecs; i++)
+			{	
+				src.store4a(dst);
+				dst += 4;
+			}
+
+			if (map_range)
+			{
+				mVertexBuffer->flush();
 			}
 		}
 	}
 
-	if (rebuild_pos)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_POSITION);
-		llassert(num_vertices > 0);
-		
-		mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range);
-			
-
-		LLMatrix4a mat_vert;
-		mat_vert.loadu(mat_vert_in);
-
-		LLVector4a* src = vf.mPositions;
-		volatile F32* dst = (volatile F32*) vert.get();
-
-		volatile F32* end = dst+num_vertices*4;
-		LLVector4a res;
-
-		LLVector4a texIdx;
-
-		U8 index = mTextureIndex < 255 ? mTextureIndex : 0;
-
-		F32 val = 0.f;
-		U8* vp = (U8*) &val;
-		vp[0] = index;
-		vp[1] = 0;
-		vp[2] = 0;
-		vp[3] = 0;
-
-		llassert(index <= LLGLSLShader::sIndexedTextureChannels-1);
-
-		LLVector4Logical mask;
-		mask.clear();
-		mask.setElement<3>();
-		
-		texIdx.set(0,0,0,val);
-
-		{
-			LLFastTimer t(FTM_FACE_POSITION_STORE);
-			LLVector4a tmp;
-
-			do
-			{	
-				mat_vert.affineTransform(*src++, res);
-				tmp.setSelectWithMask(mask, texIdx, res);
-				tmp.store4a((F32*) dst);
-				dst += 4;
-			}
-			while(dst < end);
-		}
-
-		{
-			LLFastTimer t(FTM_FACE_POSITION_PAD);
-			S32 aligned_pad_vertices = mGeomCount - num_vertices;
-			res.set(res[0], res[1], res[2], 0.f);
-
-			while (aligned_pad_vertices > 0)
-			{
-				--aligned_pad_vertices;
-				res.store4a((F32*) dst);
-				dst += 4;
-			}
-		}
-
-		if (map_range)
-		{
-			mVertexBuffer->flush();
-		}
-	}
-		
-	if (rebuild_normal)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_NORMAL);
-		mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
-		F32* normals = (F32*) norm.get();
-	
-		for (S32 i = 0; i < num_vertices; i++)
-		{	
-			LLVector4a normal;
-			mat_normal.rotate(vf.mNormals[i], normal);
-			normal.normalize3fast();
-			normal.store4a(normals);
-			normals += 4;
-		}
-
-		if (map_range)
-		{
-			mVertexBuffer->flush();
-		}
-	}
-		
-	if (rebuild_binormal)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
-		mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range);
-		F32* binormals = (F32*) binorm.get();
-		
-		for (S32 i = 0; i < num_vertices; i++)
-		{	
-			LLVector4a binormal;
-			mat_normal.rotate(vf.mBinormals[i], binormal);
-			binormal.normalize3fast();
-			binormal.store4a(binormals);
-			binormals += 4;
-		}
-
-		if (map_range)
-		{
-			mVertexBuffer->flush();
-		}
-	}
-	
-	if (rebuild_weights && vf.mWeights)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_WEIGHTS);
-		mVertexBuffer->getWeight4Strider(wght, mGeomIndex, mGeomCount, map_range);
-		F32* weights = (F32*) wght.get();
-		LLVector4a::memcpyNonAliased16(weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
-		if (map_range)
-		{
-			mVertexBuffer->flush();
-		}
-	}
-
-	if (rebuild_color && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_COLOR) )
-	{
-		LLFastTimer t(FTM_FACE_GEOM_COLOR);
-		mVertexBuffer->getColorStrider(colors, mGeomIndex, mGeomCount, map_range);
-
-		LLVector4a src;
-
-		U32 vec[4];
-		vec[0] = vec[1] = vec[2] = vec[3] = color.mAll;
-		
-		src.loadua((F32*) vec);
-
-		F32* dst = (F32*) colors.get();
-		S32 num_vecs = num_vertices/4;
-		if (num_vertices%4 > 0)
-		{
-			++num_vecs;