Commits

Scott Lawrence committed 7393f2c Merge

merge to viewer-developmento

Comments (0)

Files changed (181)

 09984bfa6cae17e0f72d02b75c1b7393c65eecfc 2.7.5-beta1
 e1ed60913230dd64269a7f7fc52cbc6004f6d52c 2.8.0-start
 502f6a5deca9365ddae57db4f1e30172668e171e 2.8.1-start
+888768f162d2c0a8de1dcc5fb9a08bd8bd120a6b DRTVWR-175
 2a3965b3ad202df7ea25d2be689291bb14a1280e DRTVWR-155
 6866d9df6efbd441c66451debd376d21211de39c DRTVWR-68_2.7.5-release
 6866d9df6efbd441c66451debd376d21211de39c 2.7.5-release
 6428242e124b523813bfaf4c45b3d422f0298c81 3.3.3-release
 57d221de3df94f90b55204313c2cef044a3c0ae2 DRTVWR-176
 09ef7fd1b0781f33b8a3a9af6236b7bcb4831910 DRTVWR-170
+005dfe5c4c377207d065fb27858d2eb0b53b143a DRTVWR-167
 f87bfbe0b62d26f451d02a47c80ebef6b9168fc2 3.3.4-beta1
 f87bfbe0b62d26f451d02a47c80ebef6b9168fc2 DRTVWR-158
 f87bfbe0b62d26f451d02a47c80ebef6b9168fc2 3.3.4-beta1
 f91d003091a61937a044652c4c674447f7dcbb7a 3.3.4-beta1
 82b5330bc8b17d0d4b598832e9c5a92e90075682 3.3.4-beta2
 eb539c65e6ee26eea2bf373af2d0f4b52dc91289 DRTVWR-177
+4ad8a3afe40e0200309e3ada68932c4295ac2795 DRTVWR-179
 a8057e1b9a1246b434a27405be35e030f7d28b0c 3.3.4-beta3
 4281aa899fb2cedb7a9ca7ce91c5c29d4aa69594 DRTVWR-180
 9cd174d3a54d93d409a7c346a15b8bfb40fc58f4 DRTVWR-184

indra/cmake/00-Common.cmake

       /Oy-
       /Zc:wchar_t-
       /arch:SSE2
+      /fp:fast
       )
      
   # Are we using the crummy Visual Studio KDU build workaround?

indra/cmake/Copy3rdPartyLibs.cmake

         libhunspell.dll
         )
 
-    if(USE_GOOGLE_PERFTOOLS)
+    if(USE_TCMALLOC)
       set(debug_files ${debug_files} libtcmalloc_minimal-debug.dll)
       set(release_files ${release_files} libtcmalloc_minimal.dll)
-    endif(USE_GOOGLE_PERFTOOLS)
+    endif(USE_TCMALLOC)
 
     if (FMOD)
       set(debug_files ${debug_files} fmod.dll)
         libopenal.so
         libopenjpeg.so
         libssl.so
-        libtcmalloc_minimal.so
         libuuid.so.16
         libuuid.so.16.0.22
         libssl.so.1.0.0
         libfontconfig.so.1.4.4
        )
 
+    if (USE_TCMALLOC)
+      set(release_files ${release_files} "libtcmalloc_minimal.so")
+    endif (USE_TCMALLOC)
+
     if (FMOD)
       set(release_files ${release_files} "libfmod-3.75.so")
     endif (FMOD)

indra/cmake/FindGooglePerfTools.cmake

File contents unchanged.

indra/cmake/GooglePerfTools.cmake

 # -*- cmake -*-
 include(Prebuilt)
 
+# If you want to enable or disable TCMALLOC in viewer builds, this is the place.
+# set ON or OFF as desired.
+set (USE_TCMALLOC ON)
+
 if (STANDALONE)
   include(FindGooglePerfTools)
 else (STANDALONE)
   if (WINDOWS)
-    use_prebuilt_binary(tcmalloc)
-    set(TCMALLOC_LIBRARIES 
-        debug libtcmalloc_minimal-debug
-        optimized libtcmalloc_minimal)
+    if (USE_TCMALLOC)
+       use_prebuilt_binary(tcmalloc)
+       set(TCMALLOC_LIBRARIES 
+         debug libtcmalloc_minimal-debug
+         optimized libtcmalloc_minimal)
+       set(TCMALLOC_LINK_FLAGS  "/INCLUDE:__tcmalloc")
+    else (USE_TCMALLOC)
+      set(TCMALLOC_LIBRARIES)
+      set(TCMALLOC_LINK_FLAGS)
+    endif (USE_TCMALLOC)
     set(GOOGLE_PERFTOOLS_FOUND "YES")
   endif (WINDOWS)
   if (LINUX)
-    use_prebuilt_binary(tcmalloc)
-    set(TCMALLOC_LIBRARIES 
-    tcmalloc)
+    if (USE_TCMALLOC)
+      use_prebuilt_binary(tcmalloc)
+      set(TCMALLOC_LIBRARIES 
+        tcmalloc)
+    else (USE_TCMALLOC)
+      set(TCMALLOC_LIBRARIES)
+    endif (USE_TCMALLOC)
     set(PROFILER_LIBRARIES profiler)
     set(GOOGLE_PERFTOOLS_INCLUDE_DIR
         ${LIBS_PREBUILT_DIR}/include)
 endif (GOOGLE_PERFTOOLS_FOUND)
 
 if (WINDOWS)
-    set(USE_GOOGLE_PERFTOOLS ON)
+   set(USE_GOOGLE_PERFTOOLS ON)
 endif (WINDOWS)
 
 if (USE_GOOGLE_PERFTOOLS)
-  set(TCMALLOC_FLAG -ULL_USE_TCMALLOC=1)
+  if (USE_TCMALLOC)
+    set(TCMALLOC_FLAG -DLL_USE_TCMALLOC=1)
+  else (USE_TCMALLOC)
+    set(TCMALLOC_FLAG -ULL_USE_TCMALLOC)
+  endif (USE_TCMALLOC)
+endif (USE_GOOGLE_PERFTOOLS)
+
+if (USE_GOOGLE_PERFTOOLS)
   include_directories(${GOOGLE_PERFTOOLS_INCLUDE_DIR})
   set(GOOGLE_PERFTOOLS_LIBRARIES ${TCMALLOC_LIBRARIES} ${STACKTRACE_LIBRARIES} ${PROFILER_LIBRARIES})
 else (USE_GOOGLE_PERFTOOLS)
-  set(TCMALLOC_FLAG -ULL_USE_TCMALLOC)
 endif (USE_GOOGLE_PERFTOOLS)

indra/cmake/LLAddBuildTest.cmake

     SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname} PROPERTIES COMPILE_FLAGS -I"${TUT_INCLUDE_DIR}")
   endif(STANDALONE)
 
+  if (WINDOWS)
+    SET_TARGET_PROPERTIES(INTEGRATION_TEST_${testname}
+        PROPERTIES
+        LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}"
+        LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO"
+        LINK_FLAGS_RELEASE ""
+        )
+  endif (WINDOWS)
+
   # Add link deps to the executable
   if(TEST_DEBUG)
     message(STATUS "TARGET_LINK_LIBRARIES(INTEGRATION_TEST_${testname} ${libraries})")

indra/llcharacter/llcharacter.cpp

 //-----------------------------------------------------------------------------
 static LLFastTimer::DeclareTimer FTM_UPDATE_ANIMATION("Update Animation");
 static LLFastTimer::DeclareTimer FTM_UPDATE_HIDDEN_ANIMATION("Update Hidden Anim");
+static LLFastTimer::DeclareTimer FTM_UPDATE_MOTIONS("Update Motions");
 
 void LLCharacter::updateMotions(e_update_t update_type)
 {
 			mMotionController.unpauseAllMotions();
 		}
 		bool force_update = (update_type == FORCE_UPDATE);
-		mMotionController.updateMotions(force_update);
+		{
+			LLFastTimer t(FTM_UPDATE_MOTIONS);
+			mMotionController.updateMotions(force_update);
+		}
 	}
 }
 

indra/llcharacter/lleditingmotion.cpp

File contents unchanged.

indra/llcharacter/llkeyframewalkmotion.cpp

File contents unchanged.

indra/llcharacter/llmotioncontroller.cpp

 //-----------------------------------------------------------------------------
 // updateMotionsByType()
 //-----------------------------------------------------------------------------
+static LLFastTimer::DeclareTimer FTM_MOTION_ON_UPDATE("Motion onUpdate");
+
 void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_type)
 {
 	BOOL update_result = TRUE;
 			}
 
 			// perform motion update
-			update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature);
+			{
+				LLFastTimer t(FTM_MOTION_ON_UPDATE);
+				update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature);
+			}
 		}
 
 		//**********************
 
 	// Always cap the number of loaded motions
 	purgeExcessMotions();
-	
+		
 	// Update timing info for this time step.
 	if (!mPaused)
 	{
 				}
 
 				updateLoadingMotions();
+				
 				return;
 			}
 			
 	}
 
 	updateLoadingMotions();
-
+	
 	resetJointSignatures();
 
 	if (mPaused && !force_update)
 	{
 		// update additive motions
 		updateAdditiveMotions();
+				
 		resetJointSignatures();
-
+		
 		// update all regular motions
 		updateRegularMotions();
-
+		
 		if (use_quantum)
 		{
 			mPoseBlender.blendAndCache(TRUE);

indra/llcommon/llallocator.cpp

 #include "linden_common.h"
 #include "llallocator.h"
 
-#if LL_USE_TCMALLOC
+#if (LL_USE_TCMALLOC && LL_USE_HEAP_PROFILER)
 
 #include "google/heap-profiler.h"
 #include "google/commandlineflags_public.h"

indra/llcommon/llmemory.cpp

 LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker;
 #endif
 
+void ll_assert_aligned_func(uintptr_t ptr,U32 alignment)
+{
+#ifdef SHOW_ASSERT
+	// Redundant, place to set breakpoints.
+	if (ptr%alignment!=0)
+	{
+		llwarns << "alignment check failed" << llendl;
+	}
+	llassert(ptr%alignment==0);
+#endif
+}
+
 //static
 void LLMemory::initClass()
 {

indra/llcommon/llmemory.h

 #define LLMEMORY_H
 
 #include "llmemtype.h"
-#if LL_DEBUG
 inline void* ll_aligned_malloc( size_t size, int align )
 {
 	void* mem = malloc( size + (align - 1) + sizeof(void*) );
 	free( ((void**)ptr)[-1] );
 }
 
+#if !LL_USE_TCMALLOC
 inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
 {
 #if defined(LL_WINDOWS)
-	return _mm_malloc(size, 16);
+	return _aligned_malloc(size, 16);
 #elif defined(LL_DARWIN)
 	return malloc(size); // default osx malloc is 16 byte aligned.
 #else
 #endif
 }
 
+inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+#if defined(LL_WINDOWS)
+	return _aligned_realloc(ptr, size, 16);
+#elif defined(LL_DARWIN)
+	return realloc(ptr,size); // default osx malloc is 16 byte aligned.
+#else
+	return realloc(ptr,size); // FIXME not guaranteed to be aligned.
+#endif
+}
+
 inline void ll_aligned_free_16(void *p)
 {
 #if defined(LL_WINDOWS)
-	_mm_free(p);
+	_aligned_free(p);
 #elif defined(LL_DARWIN)
 	return free(p);
 #else
 	free(p); // posix_memalign() is compatible with heap deallocator
 #endif
 }
+#else // USE_TCMALLOC
+// ll_aligned_foo_16 are not needed with tcmalloc
+#define ll_aligned_malloc_16 malloc
+#define ll_aligned_realloc_16 realloc
+#define ll_aligned_free_16 free
+#endif // USE_TCMALLOC
 
 inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
 {
 #if defined(LL_WINDOWS)
-	return _mm_malloc(size, 32);
+	return _aligned_malloc(size, 32);
 #elif defined(LL_DARWIN)
 	return ll_aligned_malloc( size, 32 );
 #else
 inline void ll_aligned_free_32(void *p)
 {
 #if defined(LL_WINDOWS)
-	_mm_free(p);
+	_aligned_free(p);
 #elif defined(LL_DARWIN)
 	ll_aligned_free( p );
 #else
 	free(p); // posix_memalign() is compatible with heap deallocator
 #endif
 }
-#else // LL_DEBUG
-// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
-#define ll_aligned_malloc( size, align ) malloc(size)
-#define ll_aligned_free( ptr ) free(ptr)
-#define ll_aligned_malloc_16 malloc
-#define ll_aligned_free_16 free
-#define ll_aligned_malloc_32 malloc
-#define ll_aligned_free_32 free
-#endif // LL_DEBUG
 
 #ifndef __DEBUG_PRIVATE_MEM__
 #define __DEBUG_PRIVATE_MEM__  0
 
 // LLSingleton moved to llsingleton.h
 
+LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
+
+#ifdef SHOW_ASSERT
+#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
+#else
+#define ll_assert_aligned(ptr,alignment)
 #endif
+
+
+#endif

indra/llcommon/llversionviewer.h

 
 const S32 LL_VERSION_MAJOR = 3;
 const S32 LL_VERSION_MINOR = 3;
-const S32 LL_VERSION_PATCH = 4;
+const S32 LL_VERSION_PATCH = 5;
 const S32 LL_VERSION_BUILD = 0;
 
 const char * const LL_CHANNEL = "Second Life Developer";

indra/llmath/CMakeLists.txt

   # INTEGRATION TESTS
   set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})
   # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests.
+  LL_ADD_INTEGRATION_TEST(alignment "" "${test_libs}")
   LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}")
   LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}")
   LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}")

indra/llmath/llcamera.h

 // roll(), pitch(), yaw()
 // etc...
 
-
+LL_ALIGN_PREFIX(16)
 class LLCamera
 : 	public LLCoordFrame
 {
 	};
 
 private:
-	LLPlane mAgentPlanes[7];  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP
+	LL_ALIGN_16(LLPlane mAgentPlanes[7]);  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP
 	U8 mPlaneMask[8];         // 8 for alignment	
 	
 	F32 mView;					// angle between top and bottom frustum planes in radians.
 	S32 mViewHeightInPixels;	// for ViewHeightInPixels() only
 	F32 mNearPlane;
 	F32 mFarPlane;
-	LLPlane mLocalPlanes[4];
+	LL_ALIGN_16(LLPlane mLocalPlanes[4]);
 	F32 mFixedDistance;			// Always return this distance, unless < 0
 	LLVector3 mFrustCenter;		// center of frustum and radius squared for ultra-quick exclusion test
 	F32 mFrustRadiusSquared;
 	
-	LLPlane mWorldPlanes[PLANE_NUM];
-	LLPlane mHorizPlanes[HORIZ_PLANE_NUM];
+	LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]);
+	LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]);
 
 	U32 mPlaneCount;  //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in
 
 	void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom);
 	void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2);
 	void calculateWorldFrustumPlanes();
-};
+} LL_ALIGN_POSTFIX(16);
 
 
 #endif

indra/llmath/llmatrix3a.h

 
 protected:
 
-	LLVector4a mColumns[3];
+	LL_ALIGN_16(LLVector4a mColumns[3]);
 
 };
 

indra/llmath/llmatrix4a.h

 class LLMatrix4a
 {
 public:
-	LLVector4a mMatrix[4];
+	LL_ALIGN_16(LLVector4a mMatrix[4]);
 
 	inline void clear()
 	{

indra/llmath/lloctree.h

 #include "v3math.h"
 #include "llvector4a.h"
 #include <vector>
-#include <set>
 
 #define OCT_ERRS LL_WARNS("OctreeErrors")
 
 
 	typedef LLOctreeTraveler<T>									oct_traveler;
 	typedef LLTreeTraveler<T>									tree_traveler;
-	typedef typename std::set<LLPointer<T> >					element_list;
-	typedef typename element_list::iterator						element_iter;
-	typedef typename element_list::const_iterator	const_element_iter;
+	typedef LLPointer<T>*										element_list;
+	typedef LLPointer<T>*										element_iter;
+	typedef const LLPointer<T>*									const_element_iter;
 	typedef typename std::vector<LLTreeListener<T>*>::iterator	tree_listener_iter;
-	typedef typename std::vector<LLOctreeNode<T>* >				child_list;
+	typedef LLOctreeNode<T>**									child_list;
+	typedef LLOctreeNode<T>**									child_iter;
+
 	typedef LLTreeNode<T>		BaseType;
 	typedef LLOctreeNode<T>		oct_node;
 	typedef LLOctreeListener<T>	oct_listener;
 
-	/*void* operator new(size_t size)
+	void* operator new(size_t size)
 	{
 		return ll_aligned_malloc_16(size);
 	}
 	void operator delete(void* ptr)
 	{
 		ll_aligned_free_16(ptr);
-	}*/
+	}
 
 	LLOctreeNode(	const LLVector4a& center, 
 					const LLVector4a& size, 
 	:	mParent((oct_node*)parent), 
 		mOctant(octant) 
 	{ 
+		mData = NULL;
+		mDataEnd = NULL;
+
 		mCenter = center;
 		mSize = size;
 
 	{ 
 		BaseType::destroyListeners(); 
 		
+		for (U32 i = 0; i < mElementCount; ++i)
+		{
+			mData[i]->setBinIndex(-1);
+			mData[i] = NULL;
+		}
+
+		free(mData);
+		mData = NULL;
+		mDataEnd = NULL;
+
 		for (U32 i = 0; i < getChildCount(); i++)
 		{
 			delete getChild(i);
 	}
 
 	void accept(oct_traveler* visitor)				{ visitor->visit(this); }
-	virtual bool isLeaf() const						{ return mChild.empty(); }
+	virtual bool isLeaf() const						{ return mChildCount == 0; }
 	
 	U32 getElementCount() const						{ return mElementCount; }
+	bool isEmpty() const							{ return mElementCount == 0; }
 	element_list& getData()							{ return mData; }
 	const element_list& getData() const				{ return mData; }
-	
+	element_iter getDataBegin()						{ return mData; }
+	element_iter getDataEnd()						{ return mDataEnd; }
+	const_element_iter getDataBegin() const			{ return mData; }
+	const_element_iter getDataEnd() const			{ return mDataEnd; }
+		
 	U32 getChildCount()	const						{ return mChildCount; }
 	oct_node* getChild(U32 index)					{ return mChild[index]; }
 	const oct_node* getChild(U32 index) const		{ return mChild[index]; }
 	
 	virtual bool insert(T* data)
 	{
-		if (data == NULL)
+		if (data == NULL || data->getBinIndex() != -1)
 		{
 			OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl;
 			return false;
 			if ((getElementCount() < gOctreeMaxCapacity && contains(data->getBinRadius()) ||
 				(data->getBinRadius() > getSize()[0] &&	parent && parent->getElementCount() >= gOctreeMaxCapacity))) 
 			{ //it belongs here
-				//if this is a redundant insertion, error out (should never happen)
-				llassert(mData.find(data) == mData.end());
+				mElementCount++;
+				mData = (element_list) realloc(mData, sizeof(LLPointer<T>)*mElementCount);
 
-				mData.insert(data);
+				//avoid unref on uninitialized memory
+				memset(mData+mElementCount-1, 0, sizeof(LLPointer<T>));
+
+				mData[mElementCount-1] = data;
+				mDataEnd = mData + mElementCount;
+				data->setBinIndex(mElementCount-1);
 				BaseType::insert(data);
-
-				mElementCount = mData.size();
 				return true;
 			}
 			else
 
 				if( lt == 0x7 )
 				{
-					mData.insert(data);
+					mElementCount++;
+					mData = (element_list) realloc(mData, sizeof(LLPointer<T>)*mElementCount);
+
+					//avoid unref on uninitialized memory
+					memset(mData+mElementCount-1, 0, sizeof(LLPointer<T>));
+
+					mData[mElementCount-1] = data;
+					mDataEnd = mData + mElementCount;
+					data->setBinIndex(mElementCount-1);
 					BaseType::insert(data);
-
-					mElementCount = mData.size();
 					return true;
 				}
 
 		return false;
 	}
 
+	void _remove(T* data, S32 i)
+	{ //precondition -- mElementCount > 0, idx is in range [0, mElementCount)
+
+		mElementCount--;
+		data->setBinIndex(-1); 
+		
+		if (mElementCount > 0)
+		{
+			if (mElementCount != i)
+			{
+				mData[i] = mData[mElementCount]; //might unref data, do not access data after this point
+				mData[i]->setBinIndex(i);
+			}
+
+			mData[mElementCount] = NULL; //needed for unref
+			mData = (element_list) realloc(mData, sizeof(LLPointer<T>)*mElementCount);
+			mDataEnd = mData+mElementCount;
+		}
+		else
+		{
+			mData[0] = NULL; //needed for unref
+			free(mData);
+			mData = NULL;
+			mDataEnd = NULL;
+		}
+
+		notifyRemoval(data);
+		checkAlive();
+	}
+
 	bool remove(T* data)
 	{
-		if (mData.find(data) != mData.end())
-		{	//we have data
-			mData.erase(data);
-			mElementCount = mData.size();
-			notifyRemoval(data);
-			checkAlive();
-			return true;
+		S32 i = data->getBinIndex();
+
+		if (i >= 0 && i < mElementCount)
+		{
+			if (mData[i] == data)
+			{ //found it
+				_remove(data, i);
+				llassert(data->getBinIndex() == -1);
+				return true;
+			}
 		}
-		else if (isInside(data))
+		
+		if (isInside(data))
 		{
 			oct_node* dest = getNodeAt(data);
 
 			if (dest != this)
 			{
-				return dest->remove(data);
+				bool ret = dest->remove(data);
+				llassert(data->getBinIndex() == -1);
+				return ret;
 			}
 		}
 
 		//node is now root
 		llwarns << "!!! OCTREE REMOVING FACE BY ADDRESS, SEVERE PERFORMANCE PENALTY |||" << llendl;
 		node->removeByAddress(data);
+		llassert(data->getBinIndex() == -1);
 		return true;
 	}
 
 	void removeByAddress(T* data)
 	{
-        if (mData.find(data) != mData.end())
+        for (U32 i = 0; i < mElementCount; ++i)
 		{
-			mData.erase(data);
-			mElementCount = mData.size();
-			notifyRemoval(data);
-			llwarns << "FOUND!" << llendl;
-			checkAlive();
-			return;
+			if (mData[i] == data)
+			{ //we have data
+				_remove(data, i);
+				llwarns << "FOUND!" << llendl;
+				return;
+			}
 		}
 		
 		for (U32 i = 0; i < getChildCount(); i++)
 
 	void clearChildren()
 	{
-		mChild.clear();
 		mChildCount = 0;
+
 		U32* foo = (U32*) mChildMap;
 		foo[0] = foo[1] = 0xFFFFFFFF;
 	}
 
 		mChildMap[child->getOctant()] = mChildCount;
 
-		mChild.push_back(child);
+		mChild[mChildCount] = child;
 		++mChildCount;
 		child->setParent(this);
 
 			mChild[index]->destroy();
 			delete mChild[index];
 		}
-		mChild.erase(mChild.begin() + index);
+
 		--mChildCount;
 
+		mChild[index] = mChild[mChildCount];
+		
+
 		//rebuild child map
 		U32* foo = (U32*) mChildMap;
 		foo[0] = foo[1] = 0xFFFFFFFF;
 	oct_node* mParent;
 	U8 mOctant;
 
-	child_list mChild;
+	LLOctreeNode<T>* mChild[8];
 	U8 mChildMap[8];
 	U32 mChildCount;
 
 	element_list mData;
+	element_iter mDataEnd;
 	U32 mElementCount;
 		
 }; 

indra/llmath/llplane.h

 // The plane normal = [A, B, C]
 // The closest approach = D / sqrt(A*A + B*B + C*C)
 
+
+LL_ALIGN_PREFIX(16)
 class LLPlane
 {
 public:
 		
 private:
 	LLVector4a mV;
-};
+} LL_ALIGN_POSTFIX(16);
 
 
 

indra/llmath/llsimdmath.h

 
 #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
 
-
-
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
+#include "llmemory.h"
 #include "llsimdtypes.h"
 #include "llsimdtypes.inl"
 

indra/llmath/llsimdtypes.inl

 inline LLSimdScalar operator-(const LLSimdScalar& a)
 {
 	static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+	ll_assert_aligned(signMask,16);
 	return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);
 }
 
 inline LLSimdScalar LLSimdScalar::getAbs() const
 {
 	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
+	ll_assert_aligned(F_ABS_MASK_4A,16);
 	return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
 }
 

indra/llmath/llvector4a.cpp

  * $/LicenseInfo$
  */
 
+#include "llmemory.h"
 #include "llmath.h"
 #include "llquantize.h"
 
 	assert(dst != NULL);
 	assert(bytes > 0);
 	assert((bytes % sizeof(F32))== 0); 
-	
+	ll_assert_aligned(src,16);
+	ll_assert_aligned(dst,16);
+	assert(bytes%16==0);
+
 	F32* end = dst + (bytes / sizeof(F32) );
 
 	if (bytes > 64)
 		LLVector4a oneOverDelta;
 		{
 			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f };
+			ll_assert_aligned(F_TWO_4A,16);
+			
 			LLVector4a two; two.load4a( F_TWO_4A );
 
 			// Here we use _mm_rcp_ps plus one round of newton-raphson

indra/llmath/llvector4a.h

 
 #include <assert.h>
 #include "llpreprocessor.h"
+#include "llmemory.h"
 
 ///////////////////////////////////
 // FIRST TIME USERS PLEASE READ
 // LLVector3/LLVector4. 
 /////////////////////////////////
 
+LL_ALIGN_PREFIX(16)
 class LLVector4a
 {
 public:
 	}
 
 	// Copy words 16-byte blocks from src to dst. Source and destination must not overlap. 
+	// Source and dest must be 16-byte aligned and size must be multiple of 16.
 	static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);
 
 	////////////////////////////////////
 	
 	LLVector4a()
 	{ //DO NOT INITIALIZE -- The overhead is completely unnecessary
+		ll_assert_aligned(this,16);
 	}
 	
 	LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f)
 
 private:
 	LLQuad mQ;
-};
+} LL_ALIGN_POSTFIX(16);
 
 inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p)
 {

indra/llmath/llvector4a.inl

 inline LLBool32 LLVector4a::isFinite3() const
 {
 	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+	ll_assert_aligned(nanOrInfMask,16);
 	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);
 	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );
 	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV ));

indra/llmath/llvector4logical.h

 #ifndef	LL_VECTOR4LOGICAL_H
 #define	LL_VECTOR4LOGICAL_H
 
+#include "llmemory.h"
 
 ////////////////////////////
 // LLVector4Logical
 	inline LLVector4Logical& invert()
 	{
 		static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+		ll_assert_aligned(allOnes,16);
 		mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) );
 		return *this;
 	}

indra/llmath/llvolume.cpp

 
 extern BOOL gDebugGL;
 
-void assert_aligned(void* ptr, uintptr_t alignment)
-{
-#if 0
-	uintptr_t t = (uintptr_t) ptr;
-	if (t%alignment != 0)
-	{
-		llerrs << "Alignment check failed." << llendl;
-	}
-#endif
-}
-
 BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
 {    
 	LLVector3 test = (pt2-pt1)%(pt3-pt2);
 		LLVector4a& min = node->mExtents[0];
 		LLVector4a& max = node->mExtents[1];
 
-		if (!branch->getData().empty())
+		if (!branch->isEmpty())
 		{ //node has data, find AABB that binds data set
-			const LLVolumeTriangle* tri = *(branch->getData().begin());
+			const LLVolumeTriangle* tri = *(branch->getDataBegin());
 			
 			//initialize min/max to first available vertex
 			min = *(tri->mV[0]);
 			max = *(tri->mV[0]);
 			
 			for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = 
-				branch->getData().begin(); iter != branch->getData().end(); ++iter)
+				branch->getDataBegin(); iter != branch->getDataEnd(); ++iter)
 			{ //for each triangle in node
 
 				//stretch by triangles in node
 				max.setMax(max, *tri->mV[2]);
 			}
 		}
-		else if (!branch->getChildren().empty())
+		else if (!branch->isLeaf())
 		{ //no data, but child nodes exist
 			LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0);
 
 	if (num_verts)
 	{
 		mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-		assert_aligned(mPositions, 16);
+		ll_assert_aligned(mPositions, 16);
 		mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-		assert_aligned(mNormals, 16);
+		ll_assert_aligned(mNormals, 16);
 
 		//pad texture coordinate block end to allow for QWORD reads
 		S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
 		mTexCoords = (LLVector2*) ll_aligned_malloc_16(size);
-		assert_aligned(mTexCoords, 16);
+		ll_assert_aligned(mTexCoords, 16);
 	}
 	else
 	{
 //	S32 old_size = mNumVertices*16;
 
 	//positions
-	mPositions = (LLVector4a*) realloc(mPositions, new_size);
+	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size);
+	ll_assert_aligned(mPositions,16);
 	
 	//normals
-	mNormals = (LLVector4a*) realloc(mNormals, new_size);
-	
+	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size);
+	ll_assert_aligned(mNormals,16);
+
 	//tex coords
 	new_size = ((new_verts*8)+0xF) & ~0xF;
-	mTexCoords = (LLVector2*) realloc(mTexCoords, new_size);
+	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size);
+	ll_assert_aligned(mTexCoords,16);
 	
 
 	//just clear binormals
 	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
 	if (new_size != old_size)
 	{
-		mIndices = (U16*) realloc(mIndices, new_size);
+		mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size);
+		ll_assert_aligned(mIndices,16);
 	}
 	
 	mIndices[mNumIndices++] = idx;
 	}
 
 	//allocate new buffer space
-	mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a));
-	assert_aligned(mPositions, 16);
-	mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a));
-	assert_aligned(mNormals, 16);
-	mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
-	assert_aligned(mTexCoords, 16);
+	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a));
+	ll_assert_aligned(mPositions, 16);
+	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a));
+	ll_assert_aligned(mNormals, 16);
+	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
+	ll_assert_aligned(mTexCoords, 16);
 	
 	mNumVertices = new_count;
 
 	new_count = mNumIndices + face.mNumIndices;
 
 	//allocate new index buffer
-	mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
+	mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
 	
 	//get destination address into new index buffer
 	U16* dst_idx = mIndices+mNumIndices;

indra/llmath/llvolumeoctree.cpp

 void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* node)
 {
 	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = 
-			node->getData().begin(); iter != node->getData().end(); ++iter)
+			node->getDataBegin(); iter != node->getDataEnd(); ++iter)
 	{
 		const LLVolumeTriangle* tri = *iter;
 
 	}
 
 	//children fit, check data
-	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getData().begin(); 
-			iter != branch->getData().end(); ++iter)
+	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getDataBegin(); 
+			iter != branch->getDataEnd(); ++iter)
 	{
 		const LLVolumeTriangle* tri = *iter;
 

indra/llmath/llvolumeoctree.h

 class LLVolumeTriangle : public LLRefCount
 {
 public:
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLVolumeTriangle()
 	{
-		
+		mBinIndex = -1;	
 	}
 
 	LLVolumeTriangle(const LLVolumeTriangle& rhs)
 	
 	}
 
-	LLVector4a mPositionGroup;
+	LL_ALIGN_16(LLVector4a mPositionGroup);
 
 	const LLVector4a* mV[3];
 	U16 mIndex[3];
 
 	F32 mRadius;
+	mutable S32 mBinIndex;
+
 
 	virtual const LLVector4a& getPositionGroup() const;
 	virtual const F32& getBinRadius() const;
+	
+	S32 getBinIndex() const { return mBinIndex; }
+	void setBinIndex(S32 idx) const { mBinIndex = idx; }
+
+
 };
 
 class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle>
 {
 public:
 	
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node);
 	~LLVolumeOctreeListener();
 	
 	
 
 public:
-	LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects)
-	LLVector4a mExtents[2]; // extents (min, max) of this node and all its children
+	LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects)
+	LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children
 };
 
 class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle>

indra/llmath/tests/alignment_test.cpp

+/**
+ * @file v3dmath_test.cpp
+ * @author Vir
+ * @date 2011-12
+ * @brief v3dmath test cases.
+ *
+ * $LicenseInfo:firstyear=2011&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2011, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+// Tests related to allocating objects with alignment constraints, particularly for SSE support.
+
+#include "linden_common.h"
+#include "../test/lltut.h"
+#include "../llmath.h"
+#include "../llsimdmath.h"
+#include "../llvector4a.h"
+
+void* operator new(size_t size)
+{
+	return ll_aligned_malloc_16(size);
+}
+
+void operator delete(void *p)
+{
+	ll_aligned_free_16(p);
+}
+
+namespace tut
+{
+
+#define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0)
+#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0)
+
+struct alignment_test {};
+
+typedef test_group<alignment_test> alignment_test_t;
+typedef alignment_test_t::object alignment_test_object_t;
+tut::alignment_test_t tut_alignment_test("LLAlignment");
+
+LL_ALIGN_PREFIX(16)
+class MyVector4a
+{
+	LLQuad mQ;
+} LL_ALIGN_POSTFIX(16);
+
+
+// Verify that aligned allocators perform as advertised.
+template<> template<>
+void alignment_test_object_t::test<1>()
+{
+#   ifdef LL_DEBUG
+	skip("This test fails on Windows when compiled in debug mode.");
+#   endif
+	
+	const int num_tests = 7;
+	void *align_ptr;
+	for (int i=0; i<num_tests; i++)
+	{
+		align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a));
+		ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16));
+
+		align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a));
+		ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16));
+
+		ll_aligned_free_16(align_ptr);
+
+		align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a));
+		ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32));
+		ll_aligned_free_32(align_ptr);
+	}
+}
+
+// In-place allocation of objects and arrays.
+template<> template<>
+void alignment_test_object_t::test<2>()
+{
+	MyVector4a vec1;
+	ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16));
+	
+	MyVector4a veca[12];
+	ensure("LLAlignment veca unaligned", is_aligned(veca,16));
+}
+
+// Heap allocation of objects and arrays.
+template<> template<>
+void alignment_test_object_t::test<3>()
+{
+#   ifdef LL_DEBUG
+	skip("This test fails on Windows when compiled in debug mode.");
+#   endif
+	
+	const int ARR_SIZE = 7;
+	for(int i=0; i<ARR_SIZE; i++)
+	{
+		MyVector4a *vecp = new MyVector4a;
+		ensure("LLAlignment vecp unaligned", is_aligned(vecp,16));
+		delete vecp;
+	}
+
+	MyVector4a *veca = new MyVector4a[ARR_SIZE];
+	ensure("LLAligment veca base", is_aligned(veca,16));
+	for(int i=0; i<ARR_SIZE; i++)
+	{
+		std::cout << "veca[" << i << "]" << std::endl;
+		ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16));
+	}
+}
+
+}

indra/llmessage/llurlrequest.cpp

 }
 
 static LLFastTimer::DeclareTimer FTM_PROCESS_URL_REQUEST("URL Request");
+static LLFastTimer::DeclareTimer FTM_PROCESS_URL_REQUEST_GET_RESULT("Get Result");
+static LLFastTimer::DeclareTimer FTM_URL_PERFORM("Perform");
 
 // virtual
 LLIOPipe::EStatus LLURLRequest::process_impl(
 	{
 		PUMP_DEBUG;
 		LLIOPipe::EStatus status = STATUS_BREAK;
-		static LLFastTimer::DeclareTimer FTM_URL_PERFORM("Perform");
 		{
 			LLFastTimer t(FTM_URL_PERFORM);
 			if(!mDetail->mCurlRequest->wait())
 		{
 			CURLcode result;
 
-			static LLFastTimer::DeclareTimer FTM_PROCESS_URL_REQUEST_GET_RESULT("Get Result");
-
 			bool newmsg = false;
 			{
 				LLFastTimer t(FTM_PROCESS_URL_REQUEST_GET_RESULT);

indra/llmessage/tests/llhttpclient_test.cpp

 	void HTTPClientTestObject::test<1>()
 	{
 		LLHTTPClient::get(local_server, newResult());
+
 		runThePump();
 		ensureStatusOK();
 		ensure("result object wasn't destroyed", mResultDeleted);

indra/llprimitive/llmodel.cpp

 
 	if (tc.get())
 	{
-		LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), num_verts*2*sizeof(F32));
+		U32 tex_size = (num_verts*2*sizeof(F32)+0xF)&~0xF;
+		LLVector4a::memcpyNonAliased16((F32*) face.mTexCoords, (F32*) tc.get(), tex_size);
 	}
 	else
 	{

indra/llrender/llimagegl.cpp

 
 //----------------------------------------------------------------------------
 
+static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_STATS("Image Stats");
 // static
 void LLImageGL::updateStats(F32 current_time)
 {
+	LLFastTimer t(FTM_IMAGE_UPDATE_STATS);
 	sLastFrameTime = current_time;
 	sBoundTextureMemoryInBytes = sCurBoundTextureMemory;
 	sCurBoundTextureMemory = 0;

indra/llrender/llrendertarget.cpp

 	release();
 }
 
+void LLRenderTarget::resize(U32 resx, U32 resy, U32 color_fmt)
+{ 
+	//for accounting, get the number of pixels added/subtracted
+	S32 pix_diff = (resx*resy)-(mResX*mResY);
+		
+	mResX = resx;
+	mResY = resy;
+
+	for (U32 i = 0; i < mTex.size(); ++i)
+	{ //resize color attachments
+		gGL.getTexUnit(0)->bindManual(mUsage, mTex[i]);
+		LLImageGL::setManualImage(LLTexUnit::getInternalType(mUsage), 0, color_fmt, mResX, mResY, GL_RGBA, GL_UNSIGNED_BYTE, NULL, false);
+		sBytesAllocated += pix_diff*4;
+	}
+
+	if (mDepth)
+	{ //resize depth attachment
+		if (mStencil)
+		{
+			//use render buffers where stencil buffers are in play
+			glBindRenderbuffer(GL_RENDERBUFFER, mDepth);
+			glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, mResX, mResY);
+			glBindRenderbuffer(GL_RENDERBUFFER, 0);
+		}
+		else
+		{
+			gGL.getTexUnit(0)->bindManual(mUsage, mDepth);
+			U32 internal_type = LLTexUnit::getInternalType(mUsage);
+			LLImageGL::setManualImage(internal_type, 0, GL_DEPTH_COMPONENT24, mResX, mResY, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL, false);
+		}
+
+		sBytesAllocated += pix_diff*4;
+	}
+}
+	
+
 bool LLRenderTarget::allocate(U32 resx, U32 resy, U32 color_fmt, bool depth, bool stencil, LLTexUnit::eTextureType usage, bool use_fbo, S32 samples)
 {
 	stop_glerror();

indra/llrender/llrendertarget.h

 
 */
 
-class LLMultisampleBuffer;
-
 class LLRenderTarget
 {
 public:
 	//multiple calls will release previously allocated resources
 	bool allocate(U32 resx, U32 resy, U32 color_fmt, bool depth, bool stencil, LLTexUnit::eTextureType usage = LLTexUnit::TT_TEXTURE, bool use_fbo = false, S32 samples = 0);
 
+	//resize existing attachments to use new resolution and color format
+	// CAUTION: if the GL runs out of memory attempting to resize, this render target will be undefined
+	// DO NOT use for screen space buffers or for scratch space for an image that might be uploaded
+	// DO use for render targets that resize often and aren't likely to ruin someone's day if they break
+	void resize(U32 resx, U32 resy, U32 color_fmt);
+
 	//add color buffer attachment
 	//limit of 4 color attachments per render target
 	bool addColorAttachment(U32 color_fmt);

indra/llrender/llvertexbuffer.cpp

 #include "llglslshader.h"
 #include "llmemory.h"
 
-#if LL_DARWIN
-#define LL_VBO_POOLING 1
-#else
-#endif
 //Next Highest Power Of Two
 //helper function, returns first number > v that is a power of 2, or v if v is already a power of 2
 U32 nhpo2(U32 v)
 }
 
 
+
 void LLVBOPool::cleanup()
 {
 	U32 size = LL_VBO_BLOCK_SIZE;

indra/llui/llview.cpp

File contents unchanged.

indra/llwindow/llwindowwin32.cpp

File contents unchanged.

indra/newview/CMakeLists.txt

 if (WINDOWS)
     set_target_properties(${VIEWER_BINARY_NAME}
         PROPERTIES
-        # *TODO -reenable this once we get server usage sorted out
-        #LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:\"__tcmalloc\""
-        LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS /INCLUDE:__tcmalloc "
+        LINK_FLAGS "/debug /NODEFAULTLIB:LIBCMT /SUBSYSTEM:WINDOWS ${TCMALLOC_LINK_FLAGS}"
         LINK_FLAGS_DEBUG "/NODEFAULTLIB:\"LIBCMT;LIBCMTD;MSVCRT\" /INCREMENTAL:NO"
         LINK_FLAGS_RELEASE "/FORCE:MULTIPLE /MAP\"secondlife-bin.MAP\" /OPT:REF"
         )
     # In the meantime, if you have any ideas on how to easily maintain one list, either here or in viewer_manifest.py
     # and have the build deps get tracked *please* tell me about it.
 
-    if(USE_GOOGLE_PERFTOOLS)
+    if(USE_TCMALLOC)
       # Configure a var for tcmalloc location, if used.
       # Note the need to specify multiple names explicitly.
       set(GOOGLE_PERF_TOOLS_SOURCE
         ${SHARED_LIB_STAGING_DIR}/RelWithDebInfo/libtcmalloc_minimal.dll
         ${SHARED_LIB_STAGING_DIR}/Debug/libtcmalloc_minimal-debug.dll
         )
-     endif(USE_GOOGLE_PERFTOOLS)
+     endif(USE_TCMALLOC)
 
 
     set(COPY_INPUT_DEPENDENCIES

indra/newview/app_settings/settings.xml

File contents unchanged.

indra/newview/llappviewer.cpp

 #include "lllogininstance.h"
 #include "llprogressview.h"
 #include "llvocache.h"
+#include "llvopartgroup.h"
 #include "llweb.h"
 #include "llsecondlifeurls.h"
 #include "llupdaterservice.h"
 	// initialize SSE options
 	LLVector4a::initClass();
 
+	//initialize particle index pool
+	LLVOPartGroup::initClass();
+
 	// Need to do this initialization before we do anything else, since anything
 	// that touches files should really go through the lldir API
 	gDirUtilp->initAppDirs("SecondLife");

indra/newview/llappviewerwin32.cpp

 	// This results in a 2-3x improvement in opening a new Inventory window (which uses a large numebr of allocations)
 	// Note: This won't work when running from the debugger unless the _NO_DEBUG_HEAP environment variable is set to 1
 
+	// Enable to get mem debugging within visual studio.
+	//_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
 	_CrtSetDbgFlag(0); // default, just making explicit
 	
 	ULONG ulEnableLFH = 2;

indra/newview/lldrawable.cpp

 
 static LLFastTimer::DeclareTimer FTM_CULL_REBOUND("Cull Rebound");
 
+extern bool gShiftFrame;
+
 
 ////////////////////////
 //
 	
 	mGeneration = -1;
 	mBinRadius = 1.f;
+	mBinIndex = -1;
+
 	mSpatialBridge = NULL;
 }
 
 		return;
 	}
 
+	if (gShiftFrame)
+	{
+		return;
+	}
+
 	//switch LOD with the spatial group to avoid artifacts
 	//LLSpatialGroup* sg = getSpatialGroup();
 
 		mXform.setPosition(mVObjp->getPositionAgent());
 	}
 
-	mXform.setRotation(mVObjp->getRotation());
-	mXform.setScale(1,1,1);
 	mXform.updateMatrix();
 
 	if (isStatic())
 	{
 		LLVOVolume* volume = getVOVolume();
-		if (!volume)
+
+		bool rebuild = (!volume && 
+						getRenderType() != LLPipeline::RENDER_TYPE_TREE &&
+						getRenderType() != LLPipeline::RENDER_TYPE_TERRAIN &&
+						getRenderType() != LLPipeline::RENDER_TYPE_SKY &&
+						getRenderType() != LLPipeline::RENDER_TYPE_GROUND);
+
+		if (rebuild)
 		{
 			gPipeline.markRebuild(this, LLDrawable::REBUILD_ALL, TRUE);
 		}
 				facep->mExtents[0].add(shift_vector);
 				facep->mExtents[1].add(shift_vector);
 			
-				if (!volume && facep->hasGeometry())
+				if (rebuild && facep->hasGeometry())
 				{
 					facep->clearVertexBuffer();
 				}
 {
 }
 
+LLSpatialGroup* LLDrawable::getSpatialGroup() const
+{ 
+	llassert((mSpatialGroupp == NULL) ? getBinIndex() == -1 : getBinIndex() != -1);
+	return mSpatialGroupp; 
+}
+
 void LLDrawable::setSpatialGroup(LLSpatialGroup *groupp)
 {
 /*if (mSpatialGroupp && (groupp != mSpatialGroupp))
 	}
 
 	mSpatialGroupp = groupp;
+
+	llassert((mSpatialGroupp == NULL) ? getBinIndex() == -1 : getBinIndex() != -1);
 }
 
 LLSpatialPartition* LLDrawable::getSpatialPartition()
 	mDrawable = root;
 	root->setSpatialBridge(this);
 	
+	mBinIndex = -1;
+
 	mRenderType = mDrawable->mRenderType;
 	mDrawableType = mDrawable->mRenderType;
 	
 		markDead();
 		return;
 	}
+	
+	if (gShiftFrame)
+	{
+		return;
+	}
 
 	if (mDrawable->getVObj())
 	{
 	LLDrawable::cleanupReferences();
 	if (mDrawable)
 	{
-		mDrawable->setSpatialGroup(NULL);
+		LLSpatialGroup* group = mDrawable->getSpatialGroup();
+		if (group)
+		{
+			group->mOctreeNode->remove(mDrawable);
+			mDrawable->setSpatialGroup(NULL);
+		}
+		
 		if (mDrawable->getVObj())
 		{
 			LLViewerObject::const_child_list_t& child_list = mDrawable->getVObj()->getChildren();
 				LLDrawable* drawable = child->mDrawable;					
 				if (drawable)
 				{
-					drawable->setSpatialGroup(NULL);
+					LLSpatialGroup* group = drawable->getSpatialGroup();
+					if (group)
+					{
+						group->mOctreeNode->remove(drawable);
+						drawable->setSpatialGroup(NULL);
+					}
 				}
 			}
 		}

indra/newview/lldrawable.h

 const U32 SILHOUETTE_HIGHLIGHT = 0;
 
 // All data for new renderer goes into this class.
+LL_ALIGN_PREFIX(16)
 class LLDrawable : public LLRefCount
 {
 public:
 
 	static void initClass();
 
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLDrawable()				{ init(); }
 	MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE);
 	
 	F32			          getIntensity() const			{ return llmin(mXform.getScale().mV[0], 4.f); }
 	S32					  getLOD() const				{ return mVObjp ? mVObjp->getLOD() : 1; }
 	F32					  getBinRadius() const			{ return mBinRadius; }
+	S32					  getBinIndex() const			{ return mBinIndex; }
+	void				  setBinIndex(S32 index) const	{ mBinIndex = index; }
+
 	void  getMinMax(LLVector3& min,LLVector3& max) const { mXform.getMinMax(min,max); }
 	LLXformMatrix*		getXform() { return &mXform; }
 
 	S32 findReferences(LLDrawable *drawablep); // Not const because of @#$! iterators...
 
 	void setSpatialGroup(LLSpatialGroup *groupp);
-	LLSpatialGroup *getSpatialGroup() const			{ return mSpatialGroupp; }
+	LLSpatialGroup *getSpatialGroup() const;
 	LLSpatialPartition* getSpatialPartition();
 	
 	// Statics
 	} EDrawableFlags;
 
 private: //aligned members
-	LLVector4a		mExtents[2];
-	LLVector4a		mPositionGroup;
+	LL_ALIGN_16(LLVector4a		mExtents[2]);
+	LL_ALIGN_16(LLVector4a		mPositionGroup);
 	
 public:
 	LLXformMatrix       mXform;
 	mutable U32		mVisible;
 	F32				mRadius;
 	F32				mBinRadius;
+	mutable S32		mBinIndex;
 	S32				mGeneration;
 	
 	LLVector3		mCurrentScale;
 
 	static U32 sNumZombieDrawables;
 	static LLDynamicArrayPtr<LLPointer<LLDrawable> > sDeadList;
-};
+} LL_ALIGN_POSTFIX(16);
 
 
 inline LLFace* LLDrawable::getFace(const S32 i) const

indra/newview/lldrawpool.cpp

 {
 }
 
-// static
-S32 LLFacePool::drawLoop(face_array_t& face_list)
-{
-	S32 res = 0;
-	if (!face_list.empty())
-	{
-		for (std::vector<LLFace*>::iterator iter = face_list.begin();
-			 iter != face_list.end(); iter++)
-		{
-			LLFace *facep = *iter;
-			res += facep->renderIndexed();
-		}
-	}
-	return res;
-}
-
-// static
-S32 LLFacePool::drawLoopSetTex(face_array_t& face_list, S32 stage)
-{
-	S32 res = 0;
-	if (!face_list.empty())
-	{
-		for (std::vector<LLFace*>::iterator iter = face_list.begin();
-			 iter != face_list.end(); iter++)
-		{
-			LLFace *facep = *iter;
-			gGL.getTexUnit(stage)->bind(facep->getTexture(), TRUE) ;
-			gGL.getTexUnit(0)->activate();
-			res += facep->renderIndexed();
-		}
-	}
-	return res;
-}
-
-void LLFacePool::drawLoop()
-{
-	if (!mDrawFace.empty())
-	{
-		drawLoop(mDrawFace);
-	}
-}
-
 void LLFacePool::enqueue(LLFace* facep)
 {
 	mDrawFace.push_back(facep);
 
 void LLRenderPass::pushBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
 {
-	for (LLCullResult::drawinfo_list_t::iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
+	for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
 	{
 		LLDrawInfo* pparams = *i;
 		if (pparams) 

indra/newview/lldrawpool.h

 
 	void buildEdges();
 
-	static S32 drawLoop(face_array_t& face_list);
-	static S32 drawLoopSetTex(face_array_t& face_list, S32 stage);
-	void drawLoop();
-
 	void addFaceReference(LLFace *facep);
 	void removeFaceReference(LLFace *facep);
 

indra/newview/lldrawpoolalpha.cpp

 
 void LLDrawPoolAlpha::renderAlphaHighlight(U32 mask)
 {
-	for (LLCullResult::sg_list_t::iterator i = gPipeline.beginAlphaGroups(); i != gPipeline.endAlphaGroups(); ++i)
+	for (LLCullResult::sg_iterator i = gPipeline.beginAlphaGroups(); i != gPipeline.endAlphaGroups(); ++i)
 	{
 		LLSpatialGroup* group = *i;
 		if (group->mSpatialPartition->mRenderByGroup &&
 	
 	BOOL use_shaders = gPipeline.canUseVertexShaders();
 		
-	for (LLCullResult::sg_list_t::iterator i = gPipeline.beginAlphaGroups(); i != gPipeline.endAlphaGroups(); ++i)
+	for (LLCullResult::sg_iterator i = gPipeline.beginAlphaGroups(); i != gPipeline.endAlphaGroups(); ++i)
 	{
 		LLSpatialGroup* group = *i;
 		llassert(group);
 					continue;
 				}
 
+				if ((params.mVertexBuffer->getTypeMask() & mask) != mask)
+				{ //FIXME!
+					llwarns << "Missing required components, skipping render batch." << llendl;
+					continue;
+				}
+
 				LLRenderPass::applyModelMatrix(params);