Commits

VirLinden committed 8babff9

SH-2789 WIP - various fixes to force 16-byte alignment

  • Participants
  • Parent commits 79f726f

Comments (0)

Files changed (14)

indra/llcommon/llmemory.cpp

 LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker;
 #endif
 
+void ll_assert_aligned_func(uintptr_t ptr,U32 alignment)
+{
+	if (ptr%alignment!=0)
+	{
+		llwarns << "alignment check failed" << llendl;
+	}
+	llassert(ptr%alignment==0);
+}
+
 //static
 void LLMemory::initClass()
 {

indra/llcommon/llmemory.h

 #define LLMEMORY_H
 
 #include "llmemtype.h"
-#if LL_DEBUG
+#if !LL_USE_TCMALLOC
 inline void* ll_aligned_malloc( size_t size, int align )
 {
 	void* mem = malloc( size + (align - 1) + sizeof(void*) );
 	free(p); // posix_memalign() is compatible with heap deallocator
 #endif
 }
-#else // LL_DEBUG
+
+#else // USE_TCMALLOC
 // ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
 #define ll_aligned_malloc( size, align ) malloc(size)
 #define ll_aligned_free( ptr ) free(ptr)
 
 #define CHECK_ALIGNMENT
 
+LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
+
 #ifdef CHECK_ALIGNMENT
-#define ll_assert_aligned(ptr,alignment) llassert((reinterpret_cast<uintptr_t>(ptr))%(alignment) == 0)
+#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
 #else
 #define ll_assert_aligned(ptr,alignment)
 #endif

indra/llmath/lloctree.h

 	typedef LLOctreeNode<T>		oct_node;
 	typedef LLOctreeListener<T>	oct_listener;
 
-	/*void* operator new(size_t size)
+	void* operator new(size_t size)
 	{
 		return ll_aligned_malloc_16(size);
 	}
 	void operator delete(void* ptr)
 	{
 		ll_aligned_free_16(ptr);
-	}*/
+	}
 
 	LLOctreeNode(	const LLVector4a& center, 
 					const LLVector4a& size, 

indra/llmath/llvector4a.cpp

 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
+	memcpy((void*)dst,(const void*)src,bytes);
+#if 0
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
 	assert((bytes % sizeof(F32))== 0); 
-	
+	ll_assert_aligned(src,16);
+	ll_assert_aligned(dst,16);
+
 	F32* end = dst + (bytes / sizeof(F32) );
 
 	if (bytes > 64)
 		dst += 4;
 		src += 4;
 	}
+#endif
 }
 
 void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )

indra/llmath/llvolumeoctree.h

 {
 public:
 	
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node);
 	~LLVolumeOctreeListener();
 	
 	
 
 public:
-	LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects)
-	LLVector4a mExtents[2]; // extents (min, max) of this node and all its children
+	LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects)
+	LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children
 };
 
 class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle>

indra/llmath/tests/alignment_test.cpp

 #include "../llsimdmath.h"
 #include "../llvector4a.h"
 
+void* operator new(size_t size)
+{
+	return ll_aligned_malloc_16(size);
+}
+
+void operator delete(void *p)
+{
+	ll_aligned_free_16(p);
+}
+
 namespace tut
 {
 
 #define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0)
+#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0)
 
 struct alignment_test {};
 
 	LLQuad mQ;
 } LL_ALIGN_POSTFIX(16);
 
-LL_ALIGN_PREFIX(64)
-class MyBigBlob
+// Verify that aligned allocators perform as advertised.
+template<> template<>
+void alignment_test_object_t::test<1>()
 {
-public:
-	~MyBigBlob() {}
-private:
-	LLQuad mQ[4];
-} LL_ALIGN_POSTFIX(64);
+	const int num_tests = 7;
+	void *align_ptr;
+	for (int i=0; i<num_tests; i++)
+	{
+		align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a));
+		ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16));
+		ll_aligned_free_16(align_ptr);
+
+		align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a));
+		ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32));
+		ll_aligned_free_32(align_ptr);
+	}
+}
 
 // In-place allocation of objects and arrays.
 template<> template<>
-void alignment_test_object_t::test<1>()
+void alignment_test_object_t::test<2>()
 {
 	ensure("LLAlignment reality is broken: ", (1==1));
 
 	MyVector4a vec1;
 	ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16));
 	
-	MyBigBlob bb1;
-	ensure("LLAlignment bb1 unaligned", is_aligned(&bb1,64));
-		   
-
 	MyVector4a veca[12];
 	ensure("LLAlignment veca unaligned", is_aligned(veca,16));
-
-	MyBigBlob bba[12];
-	ensure("LLAlignment bba unaligned", is_aligned(bba,64));
 }
 
 // Heap allocation of objects and arrays.
 template<> template<>
-void alignment_test_object_t::test<2>()
+void alignment_test_object_t::test<3>()
 {
 	const int ARR_SIZE = 7;
 	for(int i=0; i<ARR_SIZE; i++)
 	}
 
 	MyVector4a *veca = new MyVector4a[ARR_SIZE];
+	ensure("LLAligment veca base", is_aligned(veca,16));
 	for(int i=0; i<ARR_SIZE; i++)
 	{
-		ensure("LLAlignment veca unaligned", is_aligned(&veca[i],16));
+		std::cout << "veca[" << i << "]" << std::endl;
+		ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16));
 	}
-
-	for(int i=0; i<ARR_SIZE; i++)
-	{
-		void *aligned_addr = _aligned_malloc(sizeof(MyBigBlob),64);
-		MyBigBlob *bbp = new(aligned_addr) MyBigBlob;
-		ensure("LLAlignment bbp unaligned", is_aligned(bbp,64));
-		bbp->~MyBigBlob();
-		_aligned_free(aligned_addr);
-	}
-
-	ensure("LLAlignment big blob size",sizeof(MyBigBlob)==64);
-	void *aligned_addr = _aligned_malloc(ARR_SIZE*sizeof(MyBigBlob),64);
-	MyBigBlob *bba = new(aligned_addr) MyBigBlob[ARR_SIZE];
-	std::cout << "aligned_addr " << aligned_addr << std::endl;
-	std::cout << "bba " << bba << std::endl;
-	for(int i=0; i<ARR_SIZE; i++)
-	{
-		std::cout << "bba test " << i << std::endl;
-		ensure("LLAlignment bba unaligned", is_aligned(&bba[i],64));
-	}
-	for(int i=0; i<ARR_SIZE; i++)
-	{
-		bba[i].~MyBigBlob();
-	}
-	_aligned_free(aligned_addr);
 }
 
 }

indra/newview/lldrawable.h

 
 	static void initClass();
 
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLDrawable()				{ init(); }
 	MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE);
 	

indra/newview/lldynamictexture.h

 class LLViewerDynamicTexture : public LLViewerTexture
 {
 public:
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	enum
 	{
 		LL_VIEWER_DYNAMIC_TEXTURE = LLViewerTexture::DYNAMIC_TEXTURE,
 protected:
 	BOOL mClamp;
 	LLCoordGL mOrigin;
-	LLCamera mCamera;
+	LL_ALIGN_16(LLCamera mCamera);
 	
 	typedef std::set<LLViewerDynamicTexture*> instance_list_t;
 	static instance_list_t sInstances[ LLViewerDynamicTexture::ORDER_COUNT ];

indra/newview/llface.h

 {
 public:
 
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
+
 	LLFace(const LLFace& rhs)
 	{
 		*this = rhs;

indra/newview/llspatialpartition.cpp

 	mCurUpdatingSlotp(NULL),
 	mCurUpdatingTexture (NULL)
 {
-	ll_assert_aligned(this,64);
+	ll_assert_aligned(this,16);
 	
 	sNodeCount++;
 	LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION);

indra/newview/llspatialpartition.h

 	~LLDrawInfo();	
 	
 public:
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 
 	LLDrawInfo(const LLDrawInfo& rhs)
 	{
 	F32 mPartSize;
 	F32 mVSize;
 	LLSpatialGroup* mGroup;
-	LLFace* mFace; //associated face
+	LL_ALIGN_16(LLFace* mFace); //associated face
 	F32 mDistance;
 	U32 mDrawMode;
 
 	};
 };
 
-LL_ALIGN_PREFIX(64)
+LL_ALIGN_PREFIX(16)
 class LLSpatialGroup : public LLOctreeListener<LLDrawable>
 {
 	friend class LLSpatialPartition;
 		*this = rhs;
 	}
 
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	const LLSpatialGroup& operator=(const LLSpatialGroup& rhs)
 	{
 		llerrs << "Illegal operation!" << llendl;

indra/newview/llviewercamera.h

 class LLViewerCamera : public LLCamera, public LLSingleton<LLViewerCamera>
 {
 public:
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
 
 	typedef enum
 	{

indra/newview/llvoavatar.h

  **/
 
 public:
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLVOAvatar(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp);
 	virtual void		markDead();
 	static void			initClass(); // Initialize data that's only init'd once per class.

indra/newview/llvoavatarself.h

  **/
 
 public:
+	void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}
+
 	LLVOAvatarSelf(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp);
 	virtual 				~LLVOAvatarSelf();
 	virtual void			markDead();