Commits

davep committed 621b556 Merge

merge

  • Participants
  • Parent commits 07a1ac5, 02cf8d2

Comments (0)

Files changed (11)

File indra/llrender/llvertexbuffer.cpp

 	return r;
 }
 
-U8* LLVBOPool::allocate(U32& name, U32 size)
+volatile U8* LLVBOPool::allocate(U32& name, U32 size)
 {
 	llassert(nhpo2(size) == size);
 
 		mFreeList.resize(i+1);
 	}
 
-	U8* ret = NULL;
+	volatile U8* ret = NULL;
 
 	if (mFreeList[i].empty())
 	{
 		glBufferDataARB(mType, size, 0, mUsage);
 		LLVertexBuffer::sAllocatedBytes += size;
 
-		if (LLVertexBuffer::sDisableVBOMapping)
+		if (LLVertexBuffer::sDisableVBOMapping || mUsage != GL_DYNAMIC_DRAW_ARB)
 		{
 			ret = (U8*) ll_aligned_malloc_16(size);
 		}
 	return ret;
 }
 
-void LLVBOPool::release(U32 name, U8* buffer, U32 size)
+void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size)
 {
 	llassert(nhpo2(size) == size);
 
 	rec.mClientData = buffer;
 
 	sBytesPooled += size;
-
-	mFreeList[i].push_back(rec);
+	
+	if (!LLVertexBuffer::sDisableVBOMapping && mUsage == GL_DYNAMIC_DRAW_ARB)
+	{
+		glDeleteBuffersARB(1, &rec.mGLName);
+	}
+	else
+	{
+		mFreeList[i].push_back(rec);
+	}
 }
 
 void LLVBOPool::cleanup()
 
 			if (r.mClientData)
 			{
-				ll_aligned_free_16(r.mClientData);
+				ll_aligned_free_16((void*) r.mClientData);
 			}
 
 			l.pop_front();
 void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const
 {
 	validateRange(start, end, count, indices_offset);
-
+	mMappable = FALSE;
 	gGL.syncMatrices();
 
 	llassert(mNumVerts >= 0);
 void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const
 {
 	llassert(!LLGLSLShader::sNoFixedFunction || LLGLSLShader::sCurBoundShaderPtr != NULL);
-
+	mMappable = FALSE;
 	gGL.syncMatrices();
 
 	llassert(mNumIndices >= 0);
 void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
 {
 	llassert(!LLGLSLShader::sNoFixedFunction || LLGLSLShader::sCurBoundShaderPtr != NULL);
-	
+	mMappable = FALSE;
 	gGL.syncMatrices();
 	
 	llassert(mNumVerts >= 0);
 		mUsage = GL_DYNAMIC_DRAW_ARB;
 	}
 		
+	if (mUsage == GL_DYNAMIC_DRAW_ARB && !sDisableVBOMapping)
+	{
+		mMappable = TRUE;
+	}
+	else
+	{
+		mMappable = FALSE;
+	}
+
 	//zero out offsets
 	for (U32 i = 0; i < TYPE_MAX; i++)
 	{
 		}
 		else
 		{
-			FREE_MEM(sPrivatePoolp, mMappedData) ;
+			FREE_MEM(sPrivatePoolp, (void*) mMappedData) ;
 			mMappedData = NULL;
 			mEmpty = TRUE;
 		}
 		}
 		else
 		{
-			FREE_MEM(sPrivatePoolp, mMappedIndexData) ;
+			FREE_MEM(sPrivatePoolp, (void*) mMappedIndexData) ;
 			mMappedIndexData = NULL;
 			mEmpty = TRUE;
 		}
 	return true;
 }
 
+static LLFastTimer::DeclareTimer FTM_VBO_MAP_BUFFER_RANGE("VBO Map Range");
+static LLFastTimer::DeclareTimer FTM_VBO_MAP_BUFFER("VBO Map");
+
 // Map for data access
-U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range)
+volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range)
 {
 	bindGLBuffer(true);
 	LLMemType mt2(LLMemType::MTYPE_VERTEX_MAP_BUFFER);
 		
 	if (useVBOs())
 	{
-		if (sDisableVBOMapping || gGLManager.mHasMapBufferRange || gGLManager.mHasFlushBufferRange)
+		if (!mMappable || gGLManager.mHasMapBufferRange || gGLManager.mHasFlushBufferRange)
 		{
 			if (count == -1)
 			{
 			if (!mapped)
 			{
 				//not already mapped, map new region
-				MappedRegion region(type, !sDisableVBOMapping && map_range ? -1 : index, count);
+				MappedRegion region(type, mMappable && map_range ? -1 : index, count);
 				mMappedVertexRegions.push_back(region);
 			}
 		}
 			sMappedCount++;
 			stop_glerror();	
 
-			if(sDisableVBOMapping)
+			if(!mMappable)
 			{
 				map_range = false;
 			}
 			else
 			{
-				U8* src = NULL;
+				volatile U8* src = NULL;
 				waitFence();
 				if (gGLManager.mHasMapBufferRange)
 				{
 					if (map_range)
 					{
 #ifdef GL_ARB_map_buffer_range
+						LLFastTimer t(FTM_VBO_MAP_BUFFER_RANGE);
 						S32 offset = mOffsets[type] + sTypeSize[type]*index;
 						S32 length = (sTypeSize[type]*count+0xF) & ~0xF;
 						src = (U8*) glMapBufferRange(GL_ARRAY_BUFFER_ARB, offset, length, 
 							}
 						}
 
+						LLFastTimer t(FTM_VBO_MAP_BUFFER);
 						src = (U8*) glMapBufferRange(GL_ARRAY_BUFFER_ARB, 0, mSize, 
 							GL_MAP_WRITE_BIT | 
 							GL_MAP_FLUSH_EXPLICIT_BIT);
 
 				llassert(src != NULL);
 
-				mMappedData = LL_NEXT_ALIGNED_ADDRESS<U8>(src);
+				mMappedData = LL_NEXT_ALIGNED_ADDRESS<volatile U8>(src);
 				mAlignedOffset = mMappedData - src;
 			
 				stop_glerror();
 			//check the availability of memory
 			LLMemory::logMemoryInfo(TRUE) ; 
 			
-				if(!sDisableVBOMapping)
+				if(mMappable)
 				{			
 					//--------------------
 					//print out more debug info before crash
 		map_range = false;
 	}
 	
-	if (map_range && gGLManager.mHasMapBufferRange && !sDisableVBOMapping)
+	if (map_range && gGLManager.mHasMapBufferRange && mMappable)
 	{
 		return mMappedData;
 	}
 	}
 }
 
-U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
+
+static LLFastTimer::DeclareTimer FTM_VBO_MAP_INDEX_RANGE("IBO Map Range");
+static LLFastTimer::DeclareTimer FTM_VBO_MAP_INDEX("IBO Map");
+
+volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
 {
 	LLMemType mt2(LLMemType::MTYPE_VERTEX_MAP_BUFFER);
 	bindGLIndices(true);
 
 	if (useVBOs())
 	{
-		if (sDisableVBOMapping || gGLManager.mHasMapBufferRange || gGLManager.mHasFlushBufferRange)
+		if (!mMappable || gGLManager.mHasMapBufferRange || gGLManager.mHasFlushBufferRange)
 		{
 			if (count == -1)
 			{
 			if (!mapped)
 			{
 				//not already mapped, map new region
-				MappedRegion region(TYPE_INDEX, !sDisableVBOMapping && map_range ? -1 : index, count);
+				MappedRegion region(TYPE_INDEX, mMappable && map_range ? -1 : index, count);
 				mMappedIndexRegions.push_back(region);
 			}
 		}
 				}
 			}
 
-			if(sDisableVBOMapping)
+			if(!mMappable)
 			{
 				map_range = false;
 			}
 			else
 			{
-				U8* src = NULL;
+				volatile U8* src = NULL;
 				waitFence();
 				if (gGLManager.mHasMapBufferRange)
 				{
 					if (map_range)
 					{
 #ifdef GL_ARB_map_buffer_range
+						LLFastTimer t(FTM_VBO_MAP_INDEX_RANGE);
 						S32 offset = sizeof(U16)*index;
 						S32 length = sizeof(U16)*count;
 						src = (U8*) glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length, 
 					else
 					{
 #ifdef GL_ARB_map_buffer_range
+						LLFastTimer t(FTM_VBO_MAP_INDEX);
 						src = (U8*) glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, sizeof(U16)*mNumIndices, 
 							GL_MAP_WRITE_BIT | 
 							GL_MAP_FLUSH_EXPLICIT_BIT);
 				}
 				else
 				{
+					LLFastTimer t(FTM_VBO_MAP_INDEX);
 					map_range = false;
 					src = (U8*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
 				}
 			log_glerror();
 			LLMemory::logMemoryInfo(TRUE) ;
 
-			if(!sDisableVBOMapping)
+			if(mMappable)
 			{
 				GLint buff;
 				glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB, &buff);
 		map_range = false;
 	}
 
-	if (map_range && gGLManager.mHasMapBufferRange && !sDisableVBOMapping)
+	if (map_range && gGLManager.mHasMapBufferRange && mMappable)
 	{
 		return mMappedIndexData;
 	}
 	}
 }
 
+static LLFastTimer::DeclareTimer FTM_VBO_UNMAP("VBO Unmap");
+static LLFastTimer::DeclareTimer FTM_VBO_FLUSH_RANGE("Flush VBO Range");
+
+
+static LLFastTimer::DeclareTimer FTM_IBO_UNMAP("IBO Unmap");
+static LLFastTimer::DeclareTimer FTM_IBO_FLUSH_RANGE("Flush IBO Range");
+
 void LLVertexBuffer::unmapBuffer()
 {
 	LLMemType mt2(LLMemType::MTYPE_VERTEX_UNMAP_BUFFER);
 
 	if (mMappedData && mVertexLocked)
 	{
+		LLFastTimer t(FTM_VBO_UNMAP);
 		bindGLBuffer(true);
 		updated_all = mIndexLocked; //both vertex and index buffers done updating
 
-		if(sDisableVBOMapping)
+		if(!mMappable)
 		{
 			if (!mMappedVertexRegions.empty())
 			{
 					const MappedRegion& region = mMappedVertexRegions[i];
 					S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0;
 					S32 length = sTypeSize[region.mType]*region.mCount;
-					glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, offset, length, mMappedData+offset);
+					glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, offset, length, (U8*) mMappedData+offset);
 					stop_glerror();
 				}
 
 			else
 			{
 				stop_glerror();
-				glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, getSize(), mMappedData);
+				glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, getSize(), (U8*) mMappedData);
 				stop_glerror();
 			}
 		}
 						S32 length = sTypeSize[region.mType]*region.mCount;
 						if (gGLManager.mHasMapBufferRange)
 						{
+							LLFastTimer t(FTM_VBO_FLUSH_RANGE);
 #ifdef GL_ARB_map_buffer_range
 							glFlushMappedBufferRange(GL_ARRAY_BUFFER_ARB, offset, length);
 #endif
 	
 	if (mMappedIndexData && mIndexLocked)
 	{
+		LLFastTimer t(FTM_IBO_UNMAP);
 		bindGLIndices();
-		if(sDisableVBOMapping)
+		if(!mMappable)
 		{
 			if (!mMappedIndexRegions.empty())
 			{
 					const MappedRegion& region = mMappedIndexRegions[i];
 					S32 offset = region.mIndex >= 0 ? sizeof(U16)*region.mIndex : 0;
 					S32 length = sizeof(U16)*region.mCount;
-					glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length, mMappedIndexData+offset);
+					glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length, (U8*) mMappedIndexData+offset);
 					stop_glerror();
 				}
 
 			else
 			{
 				stop_glerror();
-				glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, getIndicesSize(), mMappedIndexData);
+				glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, getIndicesSize(), (U8*) mMappedIndexData);
 				stop_glerror();
 			}
 		}
 						S32 length = sizeof(U16)*region.mCount;
 						if (gGLManager.mHasMapBufferRange)
 						{
+							LLFastTimer t(FTM_IBO_FLUSH_RANGE);
 #ifdef GL_ARB_map_buffer_range
 							glFlushMappedBufferRange(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length);
 #endif
 	{
 		if (type == LLVertexBuffer::TYPE_INDEX)
 		{
-			U8* ptr = vbo.mapIndexBuffer(index, count, map_range);
+			volatile U8* ptr = vbo.mapIndexBuffer(index, count, map_range);
 
 			if (ptr == NULL)
 			{
 		{
 			S32 stride = LLVertexBuffer::sTypeSize[type];
 
-			U8* ptr = vbo.mapVertexBuffer(type, index, count, map_range);
+			volatile U8* ptr = vbo.mapVertexBuffer(type, index, count, map_range);
 
 			if (ptr == NULL)
 			{
 {
 	LLMemType mt2(LLMemType::MTYPE_VERTEX_SETUP_VERTEX_BUFFER);
 	stop_glerror();
-	U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
+	volatile U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
 
 	/*if ((data_mask & mTypeMask) != data_mask)
 	{

File indra/llrender/llvertexbuffer.h

 	U32 mType;
 
 	//size MUST be a power of 2
-	U8* allocate(U32& name, U32 size);
+	volatile U8* allocate(U32& name, U32 size);
 	
 	//size MUST be the size provided to allocate that returned the given name
-	void release(U32 name, U8* buffer, U32 size);
+	void release(U32 name, volatile U8* buffer, U32 size);
 	
 	//destroy all records in mFreeList
 	void cleanup();
 	{
 	public:
 		U32 mGLName;
-		U8* mClientData;
+		volatile U8* mClientData;
 	};
 
 	typedef std::list<Record> record_list_t;
 	LLVertexBuffer(U32 typemask, S32 usage);
 	
 	// map for data access
-	U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range);
-	U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);
+	volatile U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range);
+	volatile U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);
 
 	// set for rendering
 	virtual void	setBuffer(U32 data_mask); 	// calls  setupVertexBuffer() if data_mask is not 0
 	S32 getNumVerts() const					{ return mNumVerts; }
 	S32 getNumIndices() const				{ return mNumIndices; }
 	
-	U8* getIndicesPointer() const			{ return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; }
-	U8* getVerticesPointer() const			{ return useVBOs() ? (U8*) mAlignedOffset : mMappedData; }
+	volatile U8* getIndicesPointer() const			{ return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; }
+	volatile U8* getVerticesPointer() const			{ return useVBOs() ? (U8*) mAlignedOffset : mMappedData; }
 	U32 getTypeMask() const					{ return mTypeMask; }
 	bool hasDataType(S32 type) const		{ return ((1 << type) & getTypeMask()); }
 	S32 getSize() const;
 	S32 getIndicesSize() const				{ return mIndicesSize; }
-	U8* getMappedData() const				{ return mMappedData; }
-	U8* getMappedIndices() const			{ return mMappedIndexData; }
+	volatile U8* getMappedData() const				{ return mMappedData; }
+	volatile U8* getMappedIndices() const			{ return mMappedIndexData; }
 	S32 getOffset(S32 type) const			{ return mOffsets[type]; }
 	S32 getUsage() const					{ return mUsage; }
+	BOOL isWriteable() const				{ return (sDisableVBOMapping || mMappable || mUsage == GL_STREAM_DRAW_ARB) ? TRUE : FALSE; }
 
 	void draw(U32 mode, U32 count, U32 indices_offset) const;
 	void drawArrays(U32 mode, U32 offset, U32 count) const;
 	U32		mGLIndices;		// GL IBO handle
 	U32		mGLArray;		// GL VAO handle
 	
-	U8*		mMappedData;	// pointer to currently mapped data (NULL if unmapped)
-	U8*		mMappedIndexData;	// pointer to currently mapped indices (NULL if unmapped)
+	volatile U8* mMappedData;	// pointer to currently mapped data (NULL if unmapped)
+	volatile U8* mMappedIndexData;	// pointer to currently mapped indices (NULL if unmapped)
 	BOOL	mVertexLocked;			// if TRUE, vertex buffer is being or has been written to in client memory
 	BOOL	mIndexLocked;			// if TRUE, index buffer is being or has been written to in client memory
 	BOOL	mFinal;			// if TRUE, buffer can not be mapped again
 	BOOL	mEmpty;			// if TRUE, client buffer is empty (or NULL). Old values have been discarded.	
+	mutable BOOL	mMappable;     // if TRUE, use memory mapping to upload data (otherwise doublebuffer and use glBufferSubData)
 	S32		mOffsets[TYPE_MAX];
 
 	std::vector<MappedRegion> mMappedVertexRegions;

File indra/newview/lldrawpoolavatar.cpp

 		face->setGeomIndex(0);
 		face->setIndicesIndex(0);
 		
-		if (buffer.isNull() || buffer->getTypeMask() != data_mask)
+		if (buffer.isNull() || buffer->getTypeMask() != data_mask || !buffer->isWriteable())
 		{ //make a new buffer
 			if (sShaderLevel > 0)
 			{

File indra/newview/llface.cpp

 	tex_coord.mV[1] = t;
 }
 
+// Transform the texture coordinates for this face.
+static void xform4a(LLVector4a &tex_coord, const LLVector4a& trans, const LLVector4Logical& mask, const LLVector4a& rot0, const LLVector4a& rot1, const LLVector4a& offset, const LLVector4a& scale) 
+{
+	//tex coord is two coords, <s0, t0, s1, t1>
+	LLVector4a st;
+
+	// Texture transforms are done about the center of the face.
+	st.setAdd(tex_coord, trans);
+	
+	// Handle rotation
+	LLVector4a rot_st;
+		
+	// <s0 * cosAng, s0*-sinAng, s1*cosAng, s1*-sinAng>
+	LLVector4a s0;
+	s0.splat(st, 0);
+	LLVector4a s1;
+	s1.splat(st, 2);
+	LLVector4a ss;
+	ss.setSelectWithMask(mask, s1, s0);
+
+	LLVector4a a; 
+	a.setMul(rot0, ss);
+	
+	// <t0*sinAng, t0*cosAng, t1*sinAng, t1*cosAng>
+	LLVector4a t0;
+	t0.splat(st, 1);
+	LLVector4a t1;
+	t1.splat(st, 3);
+	LLVector4a tt;
+	tt.setSelectWithMask(mask, t1, t0);
+
+	LLVector4a b;
+	b.setMul(rot1, tt);
+		
+	st.setAdd(a,b);
+
+	// Then scale
+	st.mul(scale);
+
+	// Then offset
+	tex_coord.setAdd(st, offset);
+}
+
 
 bool less_than_max_mag(const LLVector4a& vec)
 {
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_WEIGHTS("Weights");
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_BINORMAL("Binormal");
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_INDEX("Index");
+static LLFastTimer::DeclareTimer FTM_FACE_GEOM_INDEX_TAIL("Tail");
+static LLFastTimer::DeclareTimer FTM_FACE_POSITION_STORE("Pos");
+static LLFastTimer::DeclareTimer FTM_FACE_TEXTURE_INDEX_STORE("TexIdx");
+static LLFastTimer::DeclareTimer FTM_FACE_POSITION_PAD("Pad");
+static LLFastTimer::DeclareTimer FTM_FACE_TEX_DEFAULT("Default");
+static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK("Quick");
+static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_NO_XFORM("No Xform");
+static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_XFORM("Xform");
+
+static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_PLANAR("Quick Planar");
 
 BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 							   const S32 &f,
 		updateRebuildFlags();
 	}
 
-	bool map_range = gGLManager.mHasMapBufferRange || gGLManager.mHasFlushBufferRange;
+
+	//don't use map range (generates many redundant unmap calls)
+	bool map_range = false; //gGLManager.mHasMapBufferRange || gGLManager.mHasFlushBufferRange;
 
 	if (mVertexBuffer.notNull())
 	{
 	}
 
 	LLStrider<LLVector3> vert;
-	LLVector4a* vertices = NULL;
 	LLStrider<LLVector2> tex_coords;
 	LLStrider<LLVector2> tex_coords2;
-	LLVector4a* normals = NULL;
 	LLStrider<LLVector3> norm;
 	LLStrider<LLColor4U> colors;
-	LLVector4a* binormals = NULL;
 	LLStrider<LLVector3> binorm;
 	LLStrider<U16> indicesp;
-	LLVector4a* weights = NULL;
 	LLStrider<LLVector4> wght;
 
 	BOOL full_rebuild = force_rebuild || mDrawablep->isState(LLDrawable::REBUILD_VOLUME);
 		LLFastTimer t(FTM_FACE_GEOM_INDEX);
 		mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex, mIndicesCount, map_range);
 
-		__m128i* dst = (__m128i*) indicesp.get();
+		volatile __m128i* dst = (__m128i*) indicesp.get();
 		__m128i* src = (__m128i*) vf.mIndices;
 		__m128i offset = _mm_set1_epi16(index_offset);
 
 		for (S32 i = 0; i < end; i++)
 		{
 			__m128i res = _mm_add_epi16(src[i], offset);
-			_mm_storeu_si128(dst+i, res);
+			_mm_storeu_si128((__m128i*) dst++, res);
 		}
 
-		for (S32 i = end*8; i < num_indices; ++i)
 		{
-			indicesp[i] = vf.mIndices[i]+index_offset;
+			LLFastTimer t(FTM_FACE_GEOM_INDEX_TAIL);
+			U16* idx = (U16*) dst;
+
+			for (S32 i = end*8; i < num_indices; ++i)
+			{
+				*idx++ = vf.mIndices[i]+index_offset;
+			}
 		}
 
 		if (map_range)
 
 			if (texgen != LLTextureEntry::TEX_GEN_PLANAR)
 			{
+				LLFastTimer t(FTM_FACE_TEX_QUICK);
 				if (!do_tex_mat)
 				{
 					if (!do_xform)
 					{
+						LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
 						LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
 					}
 					else
 					{
-						for (S32 i = 0; i < num_vertices; i++)
+						LLFastTimer t(FTM_FACE_TEX_QUICK_XFORM);
+						F32* dst = (F32*) tex_coords.get();
+						LLVector4a* src = (LLVector4a*) vf.mTexCoords;
+
+						LLVector4a trans;
+						trans.splat(-0.5f);
+
+						LLVector4a rot0;
+						rot0.set(cos_ang, -sin_ang, cos_ang, -sin_ang);
+
+						LLVector4a rot1;
+						rot1.set(sin_ang, cos_ang, sin_ang, cos_ang);
+
+						LLVector4a scale;
+						scale.set(ms, mt, ms, mt);
+
+						LLVector4a offset;
+						offset.set(os+0.5f, ot+0.5f, os+0.5f, ot+0.5f);
+
+						LLVector4Logical mask;
+						mask.clear();
+						mask.setElement<2>();
+						mask.setElement<3>();
+
+						U32 count = num_vertices/2 + num_vertices%2;
+
+						for (S32 i = 0; i < count; i++)
 						{	
-							LLVector2 tc(vf.mTexCoords[i]);
-							xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
-							*tex_coords++ = tc;	
+							LLVector4a res = *src++;
+							xform4a(res, trans, mask, rot0, rot1, offset, scale);
+							res.store4a(dst);
+							dst += 4;
 						}
 					}
 				}
 			}
 			else
 			{ //no bump, no atlas, tex gen planar
+				LLFastTimer t(FTM_FACE_TEX_QUICK_PLANAR);
 				if (do_tex_mat)
 				{
 					for (S32 i = 0; i < num_vertices; i++)
 		}
 		else
 		{ //either bump mapped or in atlas, just do the whole expensive loop
+			LLFastTimer t(FTM_FACE_TEX_DEFAULT);
 			mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount, map_range);
 
 			std::vector<LLVector2> bump_tc;
 		llassert(num_vertices > 0);
 		
 		mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range);
-		vertices = (LLVector4a*) vert.get();
-	
+			
+
 		LLMatrix4a mat_vert;
 		mat_vert.loadu(mat_vert_in);
 
 		LLVector4a* src = vf.mPositions;
-		LLVector4a* dst = vertices;
+		volatile F32* dst = (volatile F32*) vert.get();
 
-		LLVector4a* end = dst+num_vertices;
-		do
-		{	
-			mat_vert.affineTransform(*src++, *dst++);
-		}
-		while(dst < end);
+		volatile F32* end = dst+num_vertices*4;
+		LLVector4a res;
+
+		LLVector4a texIdx;
 
 		F32 index = (F32) (mTextureIndex < 255 ? mTextureIndex : 0);
+		llassert(index <= LLGLSLShader::sIndexedTextureChannels-1);
 
-		llassert(index <= LLGLSLShader::sIndexedTextureChannels-1);
-		F32 *index_dst = (F32*) vertices;
-		F32 *index_end = (F32*) end;
+		LLVector4Logical mask;
+		mask.clear();
+		mask.setElement<3>();
+		
+		texIdx.set(0,0,0,index);
 
-		index_dst += 3;
-		index_end += 3;
-		do
 		{
-			*index_dst = index;
-			index_dst += 4;
+			LLFastTimer t(FTM_FACE_POSITION_STORE);
+			LLVector4a tmp;
+
+			do
+			{	
+				mat_vert.affineTransform(*src++, res);
+				tmp.setSelectWithMask(mask, texIdx, res);
+				tmp.store4a((F32*) dst);
+				dst += 4;
+			}
+			while(dst < end);
 		}
-		while (index_dst < index_end);
-		
-		S32 aligned_pad_vertices = mGeomCount - num_vertices;
-		LLVector4a* last_vec = end - 1;
-		while (aligned_pad_vertices > 0)
+
 		{
-			--aligned_pad_vertices;
-			*dst++ = *last_vec;
+			LLFastTimer t(FTM_FACE_POSITION_PAD);
+			S32 aligned_pad_vertices = mGeomCount - num_vertices;
+			res.set(res[0], res[1], res[2], 0.f);
+
+			while (aligned_pad_vertices > 0)
+			{
+				--aligned_pad_vertices;
+				res.store4a((F32*) dst);
+				dst += 4;
+			}
 		}
-		
+
 		if (map_range)
 		{
 			mVertexBuffer->flush();
 	{
 		LLFastTimer t(FTM_FACE_GEOM_NORMAL);
 		mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
-		normals = (LLVector4a*) norm.get();
+		F32* normals = (F32*) norm.get();
 	
 		for (S32 i = 0; i < num_vertices; i++)
 		{	
 			LLVector4a normal;
 			mat_normal.rotate(vf.mNormals[i], normal);
 			normal.normalize3fast();
-			normals[i] = normal;
+			normal.store4a(normals);
+			normals += 4;
 		}
 
 		if (map_range)
 	{
 		LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
 		mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range);
-		binormals = (LLVector4a*) binorm.get();
+		F32* binormals = (F32*) binorm.get();
 		
 		for (S32 i = 0; i < num_vertices; i++)
 		{	
 			LLVector4a binormal;
 			mat_normal.rotate(vf.mBinormals[i], binormal);
 			binormal.normalize3fast();
-			binormals[i] = binormal;
+			binormal.store4a(binormals);
+			binormals += 4;
 		}
 
 		if (map_range)
 	{
 		LLFastTimer t(FTM_FACE_GEOM_WEIGHTS);
 		mVertexBuffer->getWeight4Strider(wght, mGeomIndex, mGeomCount, map_range);
-		weights = (LLVector4a*) wght.get();
-		LLVector4a::memcpyNonAliased16((F32*) weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
+		F32* weights = (F32*) wght.get();
+		LLVector4a::memcpyNonAliased16(weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
 		if (map_range)
 		{
 			mVertexBuffer->flush();
 		
 		src.loadua((F32*) vec);
 
-		LLVector4a* dst = (LLVector4a*) colors.get();
+		F32* dst = (F32*) colors.get();
 		S32 num_vecs = num_vertices/4;
 		if (num_vertices%4 > 0)
 		{
 
 		for (S32 i = 0; i < num_vecs; i++)
 		{	
-			dst[i] = src;
+			src.store4a(dst);
+			dst += 4;
 		}
 
 		if (map_range)
 		
 		src.loadua((F32*) vec);
 
-		LLVector4a* dst = (LLVector4a*) emissive.get();
+		F32* dst = (F32*) emissive.get();
 		S32 num_vecs = num_vertices/4;
 		if (num_vertices%4 > 0)
 		{
 
 		for (S32 i = 0; i < num_vecs; i++)
 		{	
-			dst[i] = src;
+			src.store4a(dst);
+			dst += 4;
 		}
 
 		if (map_range)
 		mTexExtents[1][1] *= et ;
 	}
 
+
 	mLastVertexBuffer = mVertexBuffer;
 	mLastGeomCount = mGeomCount;
 	mLastGeomIndex = mGeomIndex;

File indra/newview/llfloatermodelpreview.cpp

 				U32 num_indices = mVertexBuffer[5][mdl][i]->getNumIndices();
 				if (num_indices > 2)
 				{
-					glodInsertElements(mObject[mdl], i, GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, mVertexBuffer[5][mdl][i]->getIndicesPointer(), 0, 0.f);
+					glodInsertElements(mObject[mdl], i, GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, (U8*) mVertexBuffer[5][mdl][i]->getIndicesPointer(), 0, 0.f);
 				}
 				tri_count += num_indices/3;
 				stop_gloderror();
 				{
 					buff->allocateBuffer(sizes[i*2+1], sizes[i*2], true);
 					buff->setBuffer(type_mask);
-					glodFillElements(mObject[base], names[i], GL_UNSIGNED_SHORT, buff->getIndicesPointer());
+					glodFillElements(mObject[base], names[i], GL_UNSIGNED_SHORT, (U8*) buff->getIndicesPointer());
 					stop_gloderror();
 				}
 				else
 				{ //this face was eliminated, create a dummy triangle (one vertex, 3 indices, all 0)
 					buff->allocateBuffer(1, 3, true);
-					memset(buff->getMappedData(), 0, buff->getSize());
-					memset(buff->getIndicesPointer(), 0, buff->getIndicesSize());
+					memset((U8*) buff->getMappedData(), 0, buff->getSize());
+					memset((U8*) buff->getIndicesPointer(), 0, buff->getIndicesSize());
 				}
 
 				buff->validateRange(0, buff->getNumVerts()-1, buff->getNumIndices(), 0);
 	LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(type_mask, 0);
 	
 	buff->allocateBuffer(1, 3, true);
-	memset( buff->getMappedData(), 0, buff->getSize() );
-	memset( buff->getIndicesPointer(), 0, buff->getIndicesSize() );
+	memset( (U8*) buff->getMappedData(), 0, buff->getSize() );
+	memset( (U8*) buff->getIndicesPointer(), 0, buff->getIndicesSize() );
 		
 	buff->validateRange( 0, buff->getNumVerts()-1, buff->getNumIndices(), 0 );
 		

File indra/newview/llspatialpartition.cpp

 
 void LLSpatialGroup::buildOcclusion()
 {
-	if (mOcclusionVerts.isNull())
+	//if (mOcclusionVerts.isNull())
 	{
 		mOcclusionVerts = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 
 			LLVertexBuffer::sUseStreamDraw ? mBufferUsage : 0); //if GL has a hard time with VBOs, don't use them for occlusion culling.
 	if (vertex_count > 0 && index_count > 0)
 	{ //create vertex buffer containing volume geometry for this node
 		group->mBuilt = 1.f;
-		if (group->mVertexBuffer.isNull() || (group->mBufferUsage != group->mVertexBuffer->getUsage() && LLVertexBuffer::sEnableVBOs))
+		if (group->mVertexBuffer.isNull() ||
+			!group->mVertexBuffer->isWriteable() ||
+			(group->mBufferUsage != group->mVertexBuffer->getUsage() && LLVertexBuffer::sEnableVBOs))
 		{
 			group->mVertexBuffer = createVertexBuffer(mVertexDataMask, group->mBufferUsage);
 			group->mVertexBuffer->allocateBuffer(vertex_count, index_count, true);

File indra/newview/llviewerobject.cpp

 {
 	if (mDrawable)
 	{
-		LLSpatialGroup* group = mDrawable->getSpatialGroup();
+		gPipeline.markRebuild(mDrawable, LLDrawable::REBUILD_ALL);
+		/*LLSpatialGroup* group = mDrawable->getSpatialGroup();
 		if (group)
 		{
 			group->dirtyMesh();
-		}
+		}*/
 	}
 }
 

File indra/newview/llvosurfacepatch.cpp

 			return;
 		}
 
-		U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
+		volatile U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
 
 		//assume tex coords 2 and 3 are present
 		U32 type_mask = mTypeMask | MAP_TEXCOORD2 | MAP_TEXCOORD3;

File indra/newview/llvovolume.cpp

 	
 		//create/delete/resize vertex buffer if needed
 		LLVertexBuffer* buffer = NULL;
-		LLSpatialGroup::buffer_texture_map_t::iterator found_iter = group->mBufferMap[mask].find(*face_iter);
+
+		{ //try to find a buffer to reuse
+			LLSpatialGroup::buffer_texture_map_t::iterator found_iter = group->mBufferMap[mask].find(*face_iter);
 		
-		if (found_iter != group->mBufferMap[mask].end())
-		{
-			if ((U32) buffer_index < found_iter->second.size())
+			if (found_iter != group->mBufferMap[mask].end())
 			{
-				buffer = found_iter->second[buffer_index];
+				if ((U32) buffer_index < found_iter->second.size())
+				{
+					buffer = found_iter->second[buffer_index];
+				}
 			}
 		}
 						
-		if (!buffer)
+		if (!buffer || !buffer->isWriteable())
 		{ //create new buffer if needed
 			buffer = createVertexBuffer(mask, buffer_usage);
 			buffer->allocateBuffer(geom_count, index_count, TRUE);

File indra/newview/llvowater.cpp

 				  indices_per_quad * num_quads);
 	
 	LLVertexBuffer* buff = face->getVertexBuffer();
-	if (!buff)
+	if (!buff || !buff->isWriteable())
 	{
 		buff = new LLVertexBuffer(LLDrawPoolWater::VERTEX_DATA_MASK, GL_DYNAMIC_DRAW_ARB);
 		buff->allocateBuffer(face->getGeomCount(), face->getIndicesCount(), TRUE);

File indra/newview/llvowlsky.cpp

 	LLStrider<LLColor4U> colorsp;
 	LLStrider<LLVector2> texcoordsp;
 
-	if (mStarsVerts.isNull())
+	if (mStarsVerts.isNull() || !mStarsVerts->isWriteable())
 	{
 		mStarsVerts = new LLVertexBuffer(LLDrawPoolWLSky::STAR_VERTEX_DATA_MASK, GL_DYNAMIC_DRAW);
 		mStarsVerts->allocateBuffer(getStarsNumVerts()*6, 0, TRUE);