David Rogers avatar David Rogers committed acfa83a

Patch 3040575 - ProgressiveMesh algorithm improvement. Also adds the SmallVector class and license from the LLVM project.

Comments (0)

Files changed (12)

Docs/licenses/uiuc.txt

+==============================================================================
+LLVM Release License
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties.  Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+
+Program             Directory
+-------             ---------
+Autoconf            llvm/autoconf
+                    llvm/projects/ModuleMaker/autoconf
+                    llvm/projects/sample/autoconf
+CellSPU backend     llvm/lib/Target/CellSPU/README.txt
+Google Test         llvm/utils/unittest/googletest
+OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}

OgreMain/CMakeLists.txt

   include/OgreSkeletonInstance.h
   include/OgreSkeletonManager.h
   include/OgreSkeletonSerializer.h
+  include/OgreSmallVector.h
   include/OgreSphere.h
   include/OgreSpotShadowFadePng.h
   include/OgreStableHeaders.h
   src/OgreSkeletonInstance.cpp
   src/OgreSkeletonManager.cpp
   src/OgreSkeletonSerializer.cpp
+  src/OgreSmallVector.cpp
   src/OgreStaticGeometry.cpp
   src/OgreStreamSerialiser.cpp
   src/OgreString.cpp

OgreMain/include/OgreMesh.h

 #include "OgreAxisAlignedBox.h"
 #include "OgreVertexBoneAssignment.h"
 #include "OgreIteratorWrappers.h"
-#include "OgreProgressiveMesh.h"
 #include "OgreHardwareVertexBuffer.h"
 #include "OgreSkeleton.h"
 #include "OgreAnimationTrack.h"
 		const VertexBoneAssignmentList& getBoneAssignments() const { return mBoneAssignments; }
 
 
-		/** Automatically generates lower level of detail versions of this mesh for use
-			when a simpler version of the model is acceptable for rendering.
-		@remarks
-			There are 2 ways that you can create level-of-detail (LOD) versions of a mesh;
-			the first is to call this method, which does fairly extensive calculations to
-			work out how to simplify the mesh whilst having the minimum affect on the model.
-			The alternative is to actually create simpler versions of the mesh yourself in 
-			a modelling tool, and having exported them, tell the 'master' mesh to use these
-			alternative meshes for lower detail versions; this is done by calling the 
-			createManualLodLevel method.
-		@par
-			As well as creating the lower detail versions of the mesh, this method will
-			also associate them with depth values. As soon as an object is at least as far
-			away from the camera as the depth value associated with it's LOD, it will drop 
-			to that level of detail. 
-		@par
-			I recommend calling this method before mesh export, not at runtime.
-		@param lodValues A list of lod values indicating the values at which new lods should be
-		generated. These are 'user values', before being potentially 
-		transformed by the strategy, so for the distance strategy this is an
-		unsquared distance for example.
-		@param reductionMethod The way to determine the number of vertices collapsed per LOD
-		@param reductionValue Meaning depends on reductionMethod, typically either the proportion
-			of remaining vertices to collapse or a fixed number of vertices.
-		*/
-		void generateLodLevels(const LodValueList& lodValues, 
-			ProgressiveMesh::VertexReductionQuota reductionMethod, Real reductionValue);
-
 		/** Returns the number of levels of detail that this mesh supports. 
 		@remarks
 			This number includes the original model.

OgreMain/include/OgreProgressiveMesh.h

 #include "OgreHardwareVertexBuffer.h"
 #include "OgreHardwareIndexBuffer.h"
 #include "OgreRenderOperation.h"
+#include "OgreSmallVector.h"
 
 namespace Ogre {
 
 	/** \addtogroup LOD
 	*  @{
 	*/
-	/** This class reduces the complexity of the geometry it is given.
+	class Mesh;
+	class SubMesh;
+	
+	class _OgreExport BitArray
+	{
+	public:
+		BitArray()					: bits_ptr(NULL) {}
+		BitArray(int bits_count)	: bits_ptr(NULL) { resize(bits_count); }
+		BitArray& operator=(const BitArray& ba)	{ bits = ba.bits; bits_ptr = bits.size() > 0 ? &bits.front() : NULL; return *this; }
+		
+		bool getBit(size_t i) const	{ return bits_ptr[i >> 3] & bit_mask[i & 7]; }
+		void setBit(size_t i)		{ bits_ptr[i >> 3] |= bit_mask[i & 7]; }
+		void clearBit(size_t i)		{ bits_ptr[i >> 3] &= ~bit_mask[i & 7]; }
+		void clearAllBits()			{ memset(bits_ptr, 0, bits.size()); }
+		
+		bool empty() const			{ return bits.empty(); }
+		void resize(size_t bits_count)
+		{		
+			bits.resize((bits_count + 7) / 8);
+			bits_ptr = bits.size() > 0 ? &bits.front() : NULL;
+			clearAllBits();
+		}
+		
+		size_t getBitsCount() const
+		{
+			size_t count = 0;
+			for(unsigned char *ptr = bits_ptr, *end_ptr = bits_ptr + bits.size(); ptr != end_ptr; ++ptr)
+			{
+				const unsigned char b = *ptr;
+				count += bit_count[b & 0xF] + bit_count[b >> 4];
+			}
+			return count;
+		}
+		
+	private:
+		unsigned char*				bits_ptr;		// it`s so performance critical, so we place raw data pointer before all other members
+		vector<unsigned char>::type	bits;
+		
+		const static unsigned char	bit_mask[8];	// = { 1, 2, 4, 8, 16, 32, 64, 128 };
+		const static unsigned char	bit_count[16];	// = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+	};
+	
+    /** This class reduces the complexity of the geometry it is given.
         This class is dedicated to reducing the number of triangles in a given mesh
         taking into account seams in both geometry and texture co-ordinates and meshes 
         which have multiple frames.
 	class _OgreExport ProgressiveMesh : public ProgMeshAlloc
     {
     public:
-
+		typedef vector<Real>::type LodValueList;
+		
 		/** The way to derive the quota of vertices which are reduced at each LOD. */
         enum VertexReductionQuota
 		{
 			/// A set number of vertices are removed at each reduction
 			VRQ_CONSTANT,
 			/// A proportion of the remaining number of vertices are removed at each reduction
-			VRQ_PROPORTIONAL
+			VRQ_PROPORTIONAL,
+			/// All vertices with reduction error cost less than reductionValue * sqr(lodDistance[lodLevel] / lodDistance[0]) 
+			/// are removed at each reduction. Error cost is calculated as introduced error area divided by squared mesh diagonal
+			VRQ_ERROR_COST,
 		};
 
-        typedef vector<IndexData*>::type LODFaceList;
+		
+		/** Automatically generates lower level of detail versions of this mesh for use
+			 when a simpler version of the model is acceptable for rendering.
+		 @remarks
+			 There are 2 ways that you can create level-of-detail (LOD) versions of a mesh;
+			 the first is to call this method, which does fairly extensive calculations to
+			 work out how to simplify the mesh whilst having the minimum affect on the model.
+			 The alternative is to actually create simpler versions of the mesh yourself in 
+			 a modelling tool, and having exported them, tell the 'master' mesh to use these
+			 alternative meshes for lower detail versions; this is done by calling the 
+			 createManualLodLevel method.
+		 @par
+			 As well as creating the lower detail versions of the mesh, this method will
+			 also associate them with depth values. As soon as an object is at least as far
+			 away from the camera as the depth value associated with it's LOD, it will drop 
+			 to that level of detail. 
+		 @par
+			 I recommend calling this method before mesh export, not at runtime.
+			 @param lodValues A list of lod values indicating the values at which new lods should be
+			 generated. These are 'user values', before being potentially 
+			 transformed by the strategy, so for the distance strategy this is an
+			 unsquared distance for example.
+		 @param reductionMethod The way to determine the number of vertices collapsed per LOD
+		 @param reductionValue Meaning depends on reductionMethod, typically either the proportion
+			 of remaining vertices to collapse or a fixed number of vertices.
+		 */
+		static bool generateLodLevels(Mesh* pMesh, const LodValueList& lodValues,
+									  VertexReductionQuota reductionMethod, Real reductionValue);
 
-        /** Constructor, takes the geometry data and index buffer. 
+		/** Automatically generates lower level of detail versions of this mesh for use
+			when a simpler version of the model is acceptable for rendering. 
+		 @remarks
+			Useful for importing of external mesh with unknown size and structure into something manageable.
+		 @par
+			Simplifies vertex structure to { pos, norm, tex0 } stored in single stream.
+			Removes unused vertices, performing reindexing.
+		 @par
+			Can optionally discard first LOD level (i.e. original geometry), unused vertices would be removed.
+		 */
+		static MeshPtr generateSimplifiedMesh(const String& name, const String& groupName, Mesh* inMesh,
+											  bool dropOriginalGeometry, const LodValueList& lodValues,
+											  VertexReductionQuota reductionMethod, Real reductionValue,
+											  size_t* removedVertexDuplicatesCount);
+	protected:
+		typedef vector<ProgressiveMesh*>::type ProgressiveMeshList;
+
+		/// Allocates internal resources
+		static void initializeProgressiveMeshList(ProgressiveMeshList& pmList, Mesh* pMesh);
+
+		/// Deletes allocated internal resources.
+		static void freeProgressiveMeshList(ProgressiveMeshList* pmList);
+
+        /** Constructor, takes SubMesh pointer. 
 		@remarks
 			DO NOT pass write-only, unshadowed buffers to this method! They will not
 			work. Pass only shadowed buffers, or better yet perform mesh reduction as
 			an offline process using DefaultHardwareBufferManager to manage vertex
 			buffers in system memory.
 		*/
-        ProgressiveMesh(const VertexData* vertexData, const IndexData* indexData);
+		ProgressiveMesh(SubMesh* pSubMesh);
         virtual ~ProgressiveMesh();
 
         /** Adds an extra vertex position buffer. 
 		@param reductionValue Either the proportion of vertices to remove at each level, or a fixed
 			number of vertices to remove at each level, depending on the value of quota
         */
-        virtual void build(ushort numLevels, LODFaceList* outList, 
-			VertexReductionQuota quota = VRQ_PROPORTIONAL, Real reductionValue = 0.5f );
+		static bool build(ProgressiveMeshList& pmInList,
+						  const LodStrategy *lodStrategy, const LodValueList& lodValues,
+						  VertexReductionQuota quota, Real reductionValue = 0.5f);
+						
+    protected:
+		/// Can be NULL for non-indexed subMeshes, such PM would be skipped
+		SubMesh* m_pSubMesh;
+		
+        VertexData *mpVertexData;
+        IndexData *mpIndexData;
 
-    protected:
-        const VertexData *mpVertexData;
-        const IndexData *mpIndexData;
-
-        size_t mCurrNumIndexes;
-		size_t mNumCommonVertices;
+		vector<IndexData*>::type mLodFaceList;
+		
+		size_t mRemovedVertexDuplicatesCount;	
+		size_t mCurrNumIndexes;
+		float mInvSquaredBoundBoxDiagonal;
+		int mVertexComponentFlags;	
 
         // Internal classes
         class PMTriangle;
         class PMVertex;
+		struct vertexLess;
 
-        public: // VC6 hack
+    public: // VC6 hack
 
         /** A vertex as used by a face. This records the index of the actual vertex which is used
 		by the face, and a pointer to the common vertex used for surface evaluation. */
 			PMVertex* commonVertex;
 		};
 
-        protected:
+	protected:
 
         /** A triangle in the progressive mesh, holds extra info like face normal. */
         class _OgrePrivate PMTriangle {
 			PMFaceVertex* getFaceVertexFromCommon(PMVertex* commonVert);
 	        void notifyRemoved(void);
 
-	        PMFaceVertex* vertex[3]; // the 3 points that make this tri
-	        Vector3   normal;    // unit vector orthogonal to this face
-            bool      removed;   // true if this tri is now removed
-			size_t index;
+	        PMFaceVertex*	vertex[3];	// the 3 points that make this tri
+	        Vector3			normal;		// unit vector orthogonal to this face
+			Real			area;
+            bool			removed;	// true if this tri is now removed
+			size_t			index;
         };
 
         /** A vertex in the progressive mesh, holds info like collapse cost etc. 
 		*/
         class _OgrePrivate PMVertex {
         public:
-            PMVertex();
-	        void setDetails(const Vector3& v, size_t index);
+			enum BorderStatus { BS_UNKNOWN = 0, BS_NOT_BORDER, BS_BORDER };
+            typedef SmallVector<PMVertex *, 8> NeighborList;
+	        typedef SmallVector<PMTriangle *, 8> FaceList;
+
+		public:
+            PMVertex() : mBorderStatus(BS_UNKNOWN), removed(false) {}
+
+			void setDetails(size_t index, const Vector3& pos, const Vector3& normal, const Vector2& uv);
+		
+			bool isNearEnough(PMVertex* other) const;
 	        void removeIfNonNeighbor(PMVertex *n);
-			bool isBorder(void);/// true if this vertex is on the edge of an open geometry patch
+			void initBorderStatus(void);/// Set mBorderStatus to BS_BORDER if this vertex is on the edge of an open geometry patch
 			bool isManifoldEdgeWith(PMVertex* v); // is edge this->src a manifold edge?
 			void notifyRemoved(void);
+			void calculateNormal();
+		
+            Vector3 position;  // location of point in euclidean space
+			Vector3 normal;
+			Vector2 uv;
+			
+	        size_t index;       // place of vertex in original list
 
-            Vector3  position;  // location of point in euclidean space
-	        size_t index;       // place of vertex in original list
-            typedef set<PMVertex *>::type NeighborList;
-            typedef set<PMVertex *>::type DuplicateList;
-            NeighborList neighbor; // adjacent vertices
-	        typedef set<PMTriangle *>::type FaceList;
-            FaceList face;     // adjacent triangles
+			BorderStatus mBorderStatus;			
+            bool      removed;   // true if this vert is now removed
+			bool	  toBeRemoved; // debug
 
 	        Real collapseCost;  // cached cost of collapsing edge
 	        PMVertex * collapseTo; // candidate vertex for collapse
-            bool      removed;   // true if this vert is now removed
-			bool	  toBeRemoved; // denug
-
-			bool seam;	/// true if this vertex is on a model seam where vertices are duplicated
-
+			
+            NeighborList neighbor; // adjacent vertices
+            FaceList face;     // adjacent triangles
         };
-
+		
         typedef vector<PMTriangle>::type TriangleList;
         typedef vector<PMFaceVertex>::type FaceVertexList;
         typedef vector<PMVertex>::type CommonVertexList;
-        typedef vector<Real>::type WorstCostList;
+		typedef std::pair<Real, unsigned int> CostIndexPair;
+		typedef vector<CostIndexPair>::type WorstCostList;
 
         /// Data used to calculate the collapse costs
         struct PMWorkingData
         WorkingDataList mWorkingData;
 
         /// The worst collapse cost from all vertex buffers for each vertex
-        WorstCostList mWorstCosts;
+        WorstCostList	mWorstCosts;		// sorted by cost, but some of entries are invalidated, so check invalidCostMask
+		BitArray		mInvalidCostMask;	// indexed by vertex index
+		size_t			mInvalidCostCount;
+		size_t			mWorstCostsSize;
+		size_t			mNextWorstCostHint;	// getNextCollapser() uses it to reduce complexity from O(n^2) to O(n)
+			
+		/// Temporary variable used in computeEdgeCollapseCost, declared here to avoid multiple memory allocations
+		mutable PMVertex::FaceList mEdgeAdjacentSides;
 
         /// Internal method for building PMWorkingData from geometry data
         void addWorkingData(const VertexData* vertexData, const IndexData* indexData);
-
+		void mergeWorkingDataBorders();
+		
         /// Internal method for initialising the edge collapse costs
         void initialiseEdgeCollapseCosts(void);
         /// Internal calculation method for deriving a collapse cost  from u to v
-        Real computeEdgeCollapseCost(PMVertex *src, PMVertex *dest);
+        Real computeEdgeCollapseCost(PMVertex *src, PMVertex *dest) const;
+        /// Internal calculation method, return true if edge collapse flip some neighbor face normal
+        bool collapseInvertsNormals(PMVertex *src, PMVertex *dest) const;
         /// Internal method evaluates all collapse costs from this vertex and picks the lowest for a single buffer
-        Real computeEdgeCostAtVertexForBuffer(WorkingDataList::iterator idata, size_t vertIndex);
+        Real computeEdgeCostAtVertexForBuffer(PMVertex* v);
         /// Internal method evaluates all collapse costs from this vertex for every buffer and returns the worst
-        void computeEdgeCostAtVertex(size_t vertIndex);
+        Real computeEdgeCostAtVertex(size_t vertIndex);
         /// Internal method to compute edge collapse costs for all buffers /
         void computeAllCosts(void);
-        /// Internal method for getting the index of next best vertex to collapse
-        size_t getNextCollapser(void);
+
+        /// Internal methods for lazy costs recomputing
+		static size_t getInvalidCostCount(ProgressiveMesh::ProgressiveMeshList& pmList);
+		static bool recomputeInvalidCosts(ProgressiveMeshList& pmInList);
+		void recomputeInvalidCosts();
+		void sortIndexesByCost();
+		static int cmpByCost(const void* p1, const void* p2); // comparator for mWorstCosts sorting
+		
+        /// Internal methods for getting the index of next best vertex to collapse among all submeshes
+		static void getNextCollapser(ProgressiveMeshList& pmList, ProgressiveMesh*& pm, CostIndexPair*& bestCollapser);
+		CostIndexPair* getNextCollapser();
+		
         /// Internal method builds an new LOD based on the current state
         void bakeNewLOD(IndexData* pData);
+		/// Internal method builds an LODs usage, possibly skipping first LOD, that can be used as original geometry
+		static void bakeLodUsage(Mesh* pMesh, LodStrategy *lodStrategy, const LodValueList& lodValues, bool skipFirstLodLevel = false);
 
         /** Internal method, collapses vertex onto it's saved collapse target. 
         @remarks
         */
         void collapse(PMVertex *collapser);
 
+		/// We can defragment mesh, removing unused vertices and re-indexing other, storing old-to-new mapping in index map
+		typedef std::pair<unsigned, PMVertex*> IndexVertexPair;
+		/// Optionally discards first LOD level (i.e. original geometry), removes unused vertices, remaps indexes.
+		static void bakeSimplifiedMesh(Mesh* destMesh, Mesh* srcMesh, ProgressiveMeshList& pmList, bool dropFirstLodLevel = false);
+		/// Defragments vertices, removing unused. Usefull if original geometry is redundant or dropped at all.
+		static void	createSimplifiedVertexData(vector<IndexVertexPair>::type& usedVertices, VertexData* inVData, VertexData*& outVData, AxisAlignedBox& aabox);
+		/// During vertices defragmentation vertices are re-indexed, so old-to-new mapping is stored in index map by this function.
+		static void createIndexMap(vector<IndexVertexPair>::type& usedVertices, unsigned allVertexCount, vector<unsigned>::type& indexMap);
+		
 		/** Internal debugging method */
 		void dumpContents(const String& log);
-
-
-
-
-
-
-
-
-
     };
-
-
+			
+	template <typename T> struct HardwareBufferLockGuard
+	{
+		HardwareBufferLockGuard(const T& p, HardwareBuffer::LockOptions options)
+		: pBuf(p)
+		{
+			pData = pBuf->lock(options);
+		}
+		HardwareBufferLockGuard(const T& p, size_t offset, size_t length, HardwareBuffer::LockOptions options)
+		: pBuf(p)
+		{
+			pData = pBuf->lock(offset, length, options);
+		}		
+		~HardwareBufferLockGuard()
+		{
+			pBuf->unlock();
+		}
+		const T& pBuf;
+		void* pData;
+	};
+	
+	typedef HardwareBufferLockGuard<HardwareVertexBufferSharedPtr> VertexBufferLockGuard;
+	typedef HardwareBufferLockGuard<HardwareIndexBufferSharedPtr> IndexBufferLockGuard;
+	
 	/** @} */
 	/** @} */
-
 }
 
 #endif 

OgreMain/include/OgreSmallVector.h

+//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License.
+// ==============================================================================
+// LLVM Release License
+// ==============================================================================
+// University of Illinois/NCSA
+// Open Source License
+//
+// Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+// All rights reserved.
+//
+// Developed by:
+//
+// LLVM Team
+//
+// University of Illinois at Urbana-Champaign
+//
+// http://llvm.org
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal with
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimers.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimers in the
+// documentation and/or other materials provided with the distribution.
+//
+// * Neither the names of the LLVM Team, University of Illinois at
+// Urbana-Champaign, nor the names of its contributors may be used to
+// endorse or promote products derived from this Software without specific
+// prior written permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+// SOFTWARE.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SmallVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __SmallVector_H
+#define __SmallVector_H
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <memory>
+
+#ifdef _MSC_VER
+namespace std {
+#if _MSC_VER <= 1310
+	// Work around flawed VC++ implementation of std::uninitialized_copy.  Define
+	// additional overloads so that elements with pointer types are recognized as
+	// scalars and not objects, causing bizarre type conversion errors.
+	template<class T1, class T2>
+	inline _Scalar_ptr_iterator_tag _Ptr_cat(T1 **, T2 **) {
+		_Scalar_ptr_iterator_tag _Cat;
+		return _Cat;
+	}
+	
+	template<class T1, class T2>
+	inline _Scalar_ptr_iterator_tag _Ptr_cat(T1* const *, T2 **) {
+		_Scalar_ptr_iterator_tag _Cat;
+		return _Cat;
+	}
+#else
+	// FIXME: It is not clear if the problem is fixed in VS 2005.  What is clear
+	// is that the above hack won't work if it wasn't fixed.
+#endif
+}
+#endif
+
+namespace Ogre {
+	
+    // some type traits
+	template <typename T>	struct isPodLike { static const bool value = false; };
+	
+	template <>				struct isPodLike<bool>				{ static const bool value = true; };
+	template <>				struct isPodLike<char>				{ static const bool value = true; };
+	template <>				struct isPodLike<signed char>		{ static const bool value = true; };
+	template <>				struct isPodLike<unsigned char>		{ static const bool value = true; };
+	template <>				struct isPodLike<int>				{ static const bool value = true; };
+	template <>				struct isPodLike<unsigned>			{ static const bool value = true; };
+	template <>				struct isPodLike<short>				{ static const bool value = true; };
+	template <>				struct isPodLike<unsigned short>	{ static const bool value = true; };
+	template <>				struct isPodLike<long>				{ static const bool value = true; };
+	template <>				struct isPodLike<unsigned long>		{ static const bool value = true; };
+	template <>				struct isPodLike<float>				{ static const bool value = true; };
+	template <>				struct isPodLike<double>			{ static const bool value = true; };
+	template <typename T>	struct isPodLike<T*>				{ static const bool value = true; };
+	
+	template<typename T, typename U>
+	struct isPodLike<std::pair<T, U> > { static const bool value = isPodLike<T>::value & isPodLike<U>::value; };
+
+	/// SmallVectorBase - This is all the non-templated stuff common to all
+	/// SmallVectors.
+	class SmallVectorBase {
+	protected:
+		void *BeginX, *EndX, *CapacityX;
+		
+		// Allocate raw space for N elements of type T.  If T has a ctor or dtor, we
+		// don't want it to be automatically run, so we need to represent the space as
+		// something else.  An array of char would work great, but might not be
+  // aligned sufficiently.  Instead we use some number of union instances for
+  // the space, which guarantee maximal alignment.
+  union U {
+				double D;
+				long double LD;
+				long long L;
+				void *P;
+		} FirstEl;
+		// Space after 'FirstEl' is clobbered, do not add any instance vars after it.
+		
+	protected:
+		SmallVectorBase(size_t Size)
+		: BeginX(&FirstEl), EndX(&FirstEl), CapacityX((char*)&FirstEl+Size) {}
+		
+		/// isSmall - Return true if this is a smallvector which has not had dynamic
+		/// memory allocated for it.
+		bool isSmall() const {
+			return BeginX == static_cast<const void*>(&FirstEl);
+		}
+		
+		/// size_in_bytes - This returns size()*sizeof(T).
+		size_t size_in_bytes() const {
+			return size_t((char*)EndX - (char*)BeginX);
+		}
+		
+		/// capacity_in_bytes - This returns capacity()*sizeof(T).
+		size_t capacity_in_bytes() const {
+			return size_t((char*)CapacityX - (char*)BeginX);
+		}
+		
+		/// grow_pod - This is an implementation of the grow() method which only works
+  /// on POD-like data types and is out of line to reduce code duplication.
+		void grow_pod(size_t MinSizeInBytes, size_t TSize);
+		
+	public:
+		bool empty() const { return BeginX == EndX; }
+	};
+	
+	
+	template <typename T>
+	class SmallVectorTemplateCommon : public SmallVectorBase {
+	protected:
+		void setEnd(T *P) { this->EndX = P; }
+	public:
+		SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {}
+		
+		typedef size_t size_type;
+		typedef ptrdiff_t difference_type;
+		typedef T value_type;
+		typedef T *iterator;
+		typedef const T *const_iterator;
+		
+		typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+		typedef std::reverse_iterator<iterator> reverse_iterator;
+		
+		typedef T &reference;
+		typedef const T &const_reference;
+		typedef T *pointer;
+		typedef const T *const_pointer;
+		
+		// forward iterator creation methods.
+		iterator begin() { return (iterator)this->BeginX; }
+		const_iterator begin() const { return (const_iterator)this->BeginX; }
+		iterator end() { return (iterator)this->EndX; }
+		const_iterator end() const { return (const_iterator)this->EndX; }
+	protected:
+		iterator capacity_ptr() { return (iterator)this->CapacityX; }
+		const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;}
+	public:
+		
+		// reverse iterator creation methods.
+		reverse_iterator rbegin()            { return reverse_iterator(end()); }
+		const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
+		reverse_iterator rend()              { return reverse_iterator(begin()); }
+		const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
+		
+		size_type size() const { return end()-begin(); }
+		size_type max_size() const { return size_type(-1) / sizeof(T); }
+		
+		/// capacity - Return the total number of elements in the currently allocated
+		/// buffer.
+		size_t capacity() const { return capacity_ptr() - begin(); }
+		
+		/// data - Return a pointer to the vector's buffer, even if empty().
+		pointer data() { return pointer(begin()); }
+		/// data - Return a pointer to the vector's buffer, even if empty().
+		const_pointer data() const { return const_pointer(begin()); }
+		
+		reference operator[](unsigned idx) {
+			assert(begin() + idx < end());
+			return begin()[idx];
+		}
+		const_reference operator[](unsigned idx) const {
+			assert(begin() + idx < end());
+			return begin()[idx];
+		}
+		
+		reference front() {
+			return begin()[0];
+		}
+		const_reference front() const {
+			return begin()[0];
+		}
+		
+		reference back() {
+			return end()[-1];
+		}
+		const_reference back() const {
+			return end()[-1];
+		}
+	};
+	
+	/// SmallVectorTemplateBase<isPodLike = false> - This is where we put method
+	/// implementations that are designed to work with non-POD-like T's.
+	template <typename T, bool isPodLike>
+	class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
+	public:
+		SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
+		
+		static void destroy_range(T *S, T *E) {
+			while (S != E) {
+				--E;
+				E->~T();
+			}
+		}
+		
+		/// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory
+		/// starting with "Dest", constructing elements into it as needed.
+		template<typename It1, typename It2>
+		static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
+			std::uninitialized_copy(I, E, Dest);
+		}
+		
+		/// grow - double the size of the allocated memory, guaranteeing space for at
+		/// least one more element or MinSize if specified.
+		void grow(size_t MinSize = 0);
+	};
+	
+	// Define this out-of-line to dissuade the C++ compiler from inlining it.
+	template <typename T, bool isPodLike>
+	void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
+		size_t CurCapacity = this->capacity();
+		size_t CurSize = this->size();
+  size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero.
+		if (NewCapacity < MinSize)
+			NewCapacity = MinSize;
+		T *NewElts = static_cast<T*>(malloc(NewCapacity*sizeof(T)));
+		
+		// Copy the elements over.
+		this->uninitialized_copy(this->begin(), this->end(), NewElts);
+		
+		// Destroy the original elements.
+		destroy_range(this->begin(), this->end());
+		
+		// If this wasn't grown from the inline copy, deallocate the old space.
+		if (!this->isSmall())
+			free(this->begin());
+		
+		this->setEnd(NewElts+CurSize);
+		this->BeginX = NewElts;
+		this->CapacityX = this->begin()+NewCapacity;
+	}
+	
+	
+	/// SmallVectorTemplateBase<isPodLike = true> - This is where we put method
+	/// implementations that are designed to work with POD-like T's.
+	template <typename T>
+	class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
+	public:
+		SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
+		
+		// No need to do a destroy loop for POD's.
+		static void destroy_range(T *, T *) {}
+		
+		/// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory
+		/// starting with "Dest", constructing elements into it as needed.
+		template<typename It1, typename It2>
+		static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
+			// Arbitrary iterator types; just use the basic implementation.
+			std::uninitialized_copy(I, E, Dest);
+		}
+		
+		/// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory
+		/// starting with "Dest", constructing elements into it as needed.
+		template<typename T1, typename T2>
+		static void uninitialized_copy(T1 *I, T1 *E, T2 *Dest) {
+			// Use memcpy for PODs iterated by pointers (which includes SmallVector
+			// iterators): std::uninitialized_copy optimizes to memmove, but we can
+			// use memcpy here.
+			memcpy(Dest, I, (E-I)*sizeof(T));
+		}
+		
+		/// grow - double the size of the allocated memory, guaranteeing space for at
+		/// least one more element or MinSize if specified.
+		void grow(size_t MinSize = 0) {
+			this->grow_pod(MinSize*sizeof(T), sizeof(T));
+		}
+	};
+	
+	
+	/// SmallVectorImpl - This class consists of common code factored out of the
+	/// SmallVector class to reduce code duplication based on the SmallVector 'N'
+	/// template parameter.
+	template <typename T>
+	class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
+		typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
+		
+		SmallVectorImpl(const SmallVectorImpl&); // DISABLED.
+	public:
+		typedef typename SuperClass::iterator iterator;
+		typedef typename SuperClass::size_type size_type;
+		
+		// Default ctor - Initialize to empty.
+		explicit SmallVectorImpl(unsigned N)
+		: SmallVectorTemplateBase<T, isPodLike<T>::value>(N*sizeof(T)) {
+		}
+		
+		~SmallVectorImpl() {
+			// Destroy the constructed elements in the vector.
+			this->destroy_range(this->begin(), this->end());
+			
+			// If this wasn't grown from the inline copy, deallocate the old space.
+			if (!this->isSmall())
+				free(this->begin());
+		}
+		
+		
+		void clear() {
+			this->destroy_range(this->begin(), this->end());
+			this->EndX = this->BeginX;
+		}
+		
+		void resize(unsigned N) {
+			if (N < this->size()) {
+				this->destroy_range(this->begin()+N, this->end());
+				this->setEnd(this->begin()+N);
+			} else if (N > this->size()) {
+				if (this->capacity() < N)
+					this->grow(N);
+				this->construct_range(this->end(), this->begin()+N, T());
+				this->setEnd(this->begin()+N);
+			}
+		}
+		
+		void resize(unsigned N, const T &NV) {
+			if (N < this->size()) {
+				this->destroy_range(this->begin()+N, this->end());
+				this->setEnd(this->begin()+N);
+			} else if (N > this->size()) {
+				if (this->capacity() < N)
+					this->grow(N);
+				construct_range(this->end(), this->begin()+N, NV);
+				this->setEnd(this->begin()+N);
+			}
+		}
+		
+		void reserve(unsigned N) {
+			if (this->capacity() < N)
+				this->grow(N);
+		}
+		
+		void push_back(const T &Elt) {
+			if (this->EndX < this->CapacityX) {
+			Retry:
+				new (this->end()) T(Elt);
+				this->setEnd(this->end()+1);
+				return;
+			}
+			this->grow();
+			goto Retry;
+		}
+		
+		void pop_back() {
+			this->setEnd(this->end()-1);
+			this->end()->~T();
+		}
+		
+		T pop_back_val() {
+			T Result = this->back();
+			pop_back();
+			return Result;
+		}
+		
+		void swap(SmallVectorImpl &RHS);
+		
+		/// append - Add the specified range to the end of the SmallVector.
+		///
+		template<typename in_iter>
+		void append(in_iter in_start, in_iter in_end) {
+			size_type NumInputs = std::distance(in_start, in_end);
+			// Grow allocated space if needed.
+			if (NumInputs > size_type(this->capacity_ptr()-this->end()))
+				this->grow(this->size()+NumInputs);
+			
+			// Copy the new elements over.
+			// TODO: NEED To compile time dispatch on whether in_iter is a random access
+			// iterator to use the fast uninitialized_copy.
+			std::uninitialized_copy(in_start, in_end, this->end());
+			this->setEnd(this->end() + NumInputs);
+		}
+		
+		/// append - Add the specified range to the end of the SmallVector.
+		///
+		void append(size_type NumInputs, const T &Elt) {
+			// Grow allocated space if needed.
+			if (NumInputs > size_type(this->capacity_ptr()-this->end()))
+				this->grow(this->size()+NumInputs);
+			
+			// Copy the new elements over.
+			std::uninitialized_fill_n(this->end(), NumInputs, Elt);
+			this->setEnd(this->end() + NumInputs);
+		}
+		
+		void assign(unsigned NumElts, const T &Elt) {
+			clear();
+			if (this->capacity() < NumElts)
+				this->grow(NumElts);
+			this->setEnd(this->begin()+NumElts);
+			construct_range(this->begin(), this->end(), Elt);
+		}
+		
+		iterator erase(iterator I) {
+			iterator N = I;
+			// Shift all elts down one.
+			std::copy(I+1, this->end(), I);
+			// Drop the last elt.
+			pop_back();
+			return(N);
+		}
+		
+		iterator erase(iterator S, iterator E) {
+			iterator N = S;
+			// Shift all elts down.
+			iterator I = std::copy(E, this->end(), S);
+			// Drop the last elts.
+			this->destroy_range(I, this->end());
+			this->setEnd(I);
+			return(N);
+		}
+		
+		iterator insert(iterator I, const T &Elt) {
+			if (I == this->end()) {  // Important special case for empty vector.
+				push_back(Elt);
+				return this->end()-1;
+			}
+			
+			if (this->EndX < this->CapacityX) {
+			Retry:
+				new (this->end()) T(this->back());
+				this->setEnd(this->end()+1);
+				// Push everything else over.
+				std::copy_backward(I, this->end()-1, this->end());
+				*I = Elt;
+				return I;
+			}
+			size_t EltNo = I-this->begin();
+			this->grow();
+			I = this->begin()+EltNo;
+			goto Retry;
+		}
+		
+		iterator insert(iterator I, size_type NumToInsert, const T &Elt) {
+			if (I == this->end()) {  // Important special case for empty vector.
+				append(NumToInsert, Elt);
+				return this->end()-1;
+			}
+			
+			// Convert iterator to elt# to avoid invalidating iterator when we reserve()
+			size_t InsertElt = I - this->begin();
+			
+			// Ensure there is enough space.
+			reserve(static_cast<unsigned>(this->size() + NumToInsert));
+			
+			// Uninvalidate the iterator.
+			I = this->begin()+InsertElt;
+			
+			// If there are more elements between the insertion point and the end of the
+			// range than there are being inserted, we can use a simple approach to
+			// insertion.  Since we already reserved space, we know that this won't
+			// reallocate the vector.
+			if (size_t(this->end()-I) >= NumToInsert) {
+				T *OldEnd = this->end();
+				append(this->end()-NumToInsert, this->end());
+				
+				// Copy the existing elements that get replaced.
+				std::copy_backward(I, OldEnd-NumToInsert, OldEnd);
+				
+				std::fill_n(I, NumToInsert, Elt);
+				return I;
+			}
+			
+			// Otherwise, we're inserting more elements than exist already, and we're
+			// not inserting at the end.
+			
+			// Copy over the elements that we're about to overwrite.
+			T *OldEnd = this->end();
+			this->setEnd(this->end() + NumToInsert);
+			size_t NumOverwritten = OldEnd-I;
+			this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten);
+			
+			// Replace the overwritten part.
+			std::fill_n(I, NumOverwritten, Elt);
+			
+			// Insert the non-overwritten middle part.
+			std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt);
+			return I;
+		}
+		
+		template<typename ItTy>
+		iterator insert(iterator I, ItTy From, ItTy To) {
+			if (I == this->end()) {  // Important special case for empty vector.
+				append(From, To);
+				return this->end()-1;
+			}
+			
+			size_t NumToInsert = std::distance(From, To);
+			// Convert iterator to elt# to avoid invalidating iterator when we reserve()
+			size_t InsertElt = I - this->begin();
+			
+			// Ensure there is enough space.
+			reserve(static_cast<unsigned>(this->size() + NumToInsert));
+			
+			// Uninvalidate the iterator.
+			I = this->begin()+InsertElt;
+			
+			// If there are more elements between the insertion point and the end of the
+			// range than there are being inserted, we can use a simple approach to
+			// insertion.  Since we already reserved space, we know that this won't
+			// reallocate the vector.
+			if (size_t(this->end()-I) >= NumToInsert) {
+				T *OldEnd = this->end();
+				append(this->end()-NumToInsert, this->end());
+				
+				// Copy the existing elements that get replaced.
+				std::copy_backward(I, OldEnd-NumToInsert, OldEnd);
+				
+				std::copy(From, To, I);
+				return I;
+			}
+			
+			// Otherwise, we're inserting more elements than exist already, and we're
+			// not inserting at the end.
+			
+			// Copy over the elements that we're about to overwrite.
+			T *OldEnd = this->end();
+			this->setEnd(this->end() + NumToInsert);
+			size_t NumOverwritten = OldEnd-I;
+			this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten);
+			
+			// Replace the overwritten part.
+			for (; NumOverwritten > 0; --NumOverwritten) {
+				*I = *From;
+				++I; ++From;
+			}
+			
+			// Insert the non-overwritten middle part.
+			this->uninitialized_copy(From, To, OldEnd);
+			return I;
+		}
+		
+		const SmallVectorImpl
+		&operator=(const SmallVectorImpl &RHS);
+		
+		bool operator==(const SmallVectorImpl &RHS) const {
+			if (this->size() != RHS.size()) return false;
+			return std::equal(this->begin(), this->end(), RHS.begin());
+		}
+		bool operator!=(const SmallVectorImpl &RHS) const {
+			return !(*this == RHS);
+		}
+		
+		bool operator<(const SmallVectorImpl &RHS) const {
+			return std::lexicographical_compare(this->begin(), this->end(),
+												RHS.begin(), RHS.end());
+		}
+		
+		/// set_size - Set the array size to \arg N, which the current array must have
+		/// enough capacity for.
+		///
+		/// This does not construct or destroy any elements in the vector.
+		///
+		/// Clients can use this in conjunction with capacity() to write past the end
+		/// of the buffer when they know that more elements are available, and only
+		/// update the size later. This avoids the cost of value initializing elements
+		/// which will only be overwritten.
+		void set_size(unsigned N) {
+			assert(N <= this->capacity());
+			this->setEnd(this->begin() + N);
+		}
+		
+	private:
+		static void construct_range(T *S, T *E, const T &Elt) {
+			for (; S != E; ++S)
+				new (S) T(Elt);
+		}
+	};
+	
+	
+	template <typename T>
+	void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
+		if (this == &RHS) return;
+		
+		// We can only avoid copying elements if neither vector is small.
+		if (!this->isSmall() && !RHS.isSmall()) {
+			std::swap(this->BeginX, RHS.BeginX);
+			std::swap(this->EndX, RHS.EndX);
+			std::swap(this->CapacityX, RHS.CapacityX);
+			return;
+		}
+		if (RHS.size() > this->capacity())
+			this->grow(RHS.size());
+		if (this->size() > RHS.capacity())
+			RHS.grow(this->size());
+		
+		// Swap the shared elements.
+		size_t NumShared = this->size();
+		if (NumShared > RHS.size()) NumShared = RHS.size();
+		for (unsigned i = 0; i != static_cast<unsigned>(NumShared); ++i)
+			std::swap((*this)[i], RHS[i]);
+		
+		// Copy over the extra elts.
+		if (this->size() > RHS.size()) {
+			size_t EltDiff = this->size() - RHS.size();
+			this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
+			RHS.setEnd(RHS.end()+EltDiff);
+			this->destroy_range(this->begin()+NumShared, this->end());
+			this->setEnd(this->begin()+NumShared);
+		} else if (RHS.size() > this->size()) {
+			size_t EltDiff = RHS.size() - this->size();
+			this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
+			this->setEnd(this->end() + EltDiff);
+			this->destroy_range(RHS.begin()+NumShared, RHS.end());
+			RHS.setEnd(RHS.begin()+NumShared);
+		}
+	}
+	
+	template <typename T>
+	const SmallVectorImpl<T> &SmallVectorImpl<T>::
+	operator=(const SmallVectorImpl<T> &RHS) {
+		// Avoid self-assignment.
+		if (this == &RHS) return *this;
+		
+		// If we already have sufficient space, assign the common elements, then
+		// destroy any excess.
+		size_t RHSSize = RHS.size();
+		size_t CurSize = this->size();
+		if (CurSize >= RHSSize) {
+			// Assign common elements.
+			iterator NewEnd;
+			if (RHSSize)
+				NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
+			else
+				NewEnd = this->begin();
+			
+			// Destroy excess elements.
+			this->destroy_range(NewEnd, this->end());
+			
+			// Trim.
+			this->setEnd(NewEnd);
+			return *this;
+		}
+		
+		// If we have to grow to have enough elements, destroy the current elements.
+		// This allows us to avoid copying them during the grow.
+		if (this->capacity() < RHSSize) {
+			// Destroy current elements.
+			this->destroy_range(this->begin(), this->end());
+			this->setEnd(this->begin());
+			CurSize = 0;
+			this->grow(RHSSize);
+		} else if (CurSize) {
+			// Otherwise, use assignment for the already-constructed elements.
+			std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
+		}
+		
+		// Copy construct the new elements in place.
+		this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
+								 this->begin()+CurSize);
+		
+		// Set end.
+		this->setEnd(this->begin()+RHSSize);
+		return *this;
+	}
+	
+	
+	/// SmallVector - This is a 'vector' (really, a variable-sized array), optimized
+	/// for the case when the array is small.  It contains some number of elements
+	/// in-place, which allows it to avoid heap allocation when the actual number of
+	/// elements is below that threshold.  This allows normal "small" cases to be
+	/// fast without losing generality for large inputs.
+	///
+	/// Note that this does not attempt to be exception safe.
+	///
+	template <typename T, unsigned N>
+	class SmallVector : public SmallVectorImpl<T> {
+		/// InlineElts - These are 'N-1' elements that are stored inline in the body
+		/// of the vector.  The extra '1' element is stored in SmallVectorImpl.
+		typedef typename SmallVectorImpl<T>::U U;
+		enum {
+			// MinUs - The number of U's require to cover N T's.
+			MinUs = (static_cast<unsigned int>(sizeof(T))*N +
+					 static_cast<unsigned int>(sizeof(U)) - 1) /
+            static_cast<unsigned int>(sizeof(U)),
+			
+			// NumInlineEltsElts - The number of elements actually in this array.  There
+			// is already one in the parent class, and we have to round up to avoid
+			// having a zero-element array.
+			NumInlineEltsElts = MinUs > 1 ? (MinUs - 1) : 1,
+			
+			// NumTsAvailable - The number of T's we actually have space for, which may
+			// be more than N due to rounding.
+			NumTsAvailable = (NumInlineEltsElts+1)*static_cast<unsigned int>(sizeof(U))/
+			static_cast<unsigned int>(sizeof(T))
+		};
+		U InlineElts[NumInlineEltsElts];
+	public:
+		SmallVector() : SmallVectorImpl<T>(NumTsAvailable) {
+		}
+		
+		explicit SmallVector(unsigned Size, const T &Value = T())
+		: SmallVectorImpl<T>(NumTsAvailable) {
+			this->reserve(Size);
+			while (Size--)
+				this->push_back(Value);
+		}
+		
+		template<typename ItTy>
+		SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(NumTsAvailable) {
+			this->append(S, E);
+		}
+		
+		SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(NumTsAvailable) {
+			if (!RHS.empty())
+				SmallVectorImpl<T>::operator=(RHS);
+		}
+		
+		const SmallVector &operator=(const SmallVector &RHS) {
+			SmallVectorImpl<T>::operator=(RHS);
+			return *this;
+		}
+		
+	};
+
+/// Specialize SmallVector at N=0.  This specialization guarantees
+/// that it can be instantiated at an incomplete T if none of its
+/// members are required.
+template <typename T>
+class SmallVector<T,0> : public SmallVectorImpl<T> {
+public:
+  SmallVector() : SmallVectorImpl<T>(0) {}
+
+  explicit SmallVector(unsigned Size, const T &Value = T())
+    : SmallVectorImpl<T>(0) {
+    this->reserve(Size);
+    while (Size--)
+      this->push_back(Value);
+  }
+
+  template<typename ItTy>
+  SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(0) {
+    this->append(S, E);
+  }
+
+  SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(0) {
+    SmallVectorImpl<T>::operator=(RHS);
+  }
+
+  SmallVector &operator=(const SmallVectorImpl<T> &RHS) {
+    return SmallVectorImpl<T>::operator=(RHS);
+  }
+
+};
+
+} // End Ogre namespace
+
+namespace std {
+	/// Implement std::swap in terms of SmallVector swap.
+	template<typename T>
+	inline void
+	swap(Ogre::SmallVectorImpl<T> &LHS, Ogre::SmallVectorImpl<T> &RHS) {
+		LHS.swap(RHS);
+	}
+	
+	/// Implement std::swap in terms of SmallVector swap.
+	template<typename T, unsigned N>
+	inline void
+	swap(Ogre::SmallVector<T, N> &LHS, Ogre::SmallVector<T, N> &RHS) {
+		LHS.swap(RHS);
+	}
+}
+
+#endif

OgreMain/include/OgreSubMesh.h

 #include "OgreMaterial.h"
 #include "OgreRenderOperation.h"
 #include "OgreVertexBoneAssignment.h"
-#include "OgreProgressiveMesh.h"
 #include "OgreAnimationTrack.h"
 #include "OgreResourceGroupManager.h"
 
         typedef vector<unsigned short>::type IndexMap;
         IndexMap blendIndexToBoneIndexMap;
 
-        ProgressiveMesh::LODFaceList mLodFaceList;
+		typedef vector<IndexData*>::type LODFaceList;
+		LODFaceList mLodFaceList;
 
         /** A list of extreme points on the submesh (optional).
             @remarks

OgreMain/src/OgreMesh.cpp

 
             // Copy lod face lists
             newSub->mLodFaceList.reserve((*subi)->mLodFaceList.size());
-            ProgressiveMesh::LODFaceList::const_iterator facei;
+            SubMesh::LODFaceList::const_iterator facei;
             for (facei = (*subi)->mLodFaceList.begin(); facei != (*subi)->mLodFaceList.end(); ++facei) {
                 IndexData* newIndexData = (*facei)->clone();
                 newSub->mLodFaceList.push_back(newIndexData);
         return mSkeletonName;
     }
     //---------------------------------------------------------------------
-    void Mesh::generateLodLevels(const LodValueList& lodValues,
-        ProgressiveMesh::VertexReductionQuota reductionMethod, Real reductionValue)
-    {
-#if OGRE_DEBUG_MODE
-        mLodStrategy->assertSorted(lodValues);
-#endif
-
-        removeLodLevels();
-
-		LogManager::getSingleton().stream()
-			<< "Generating " << lodValues.size()
-			<< " lower LODs for mesh " << mName;
-
-        SubMeshList::iterator isub, isubend;
-        isubend = mSubMeshList.end();
-        for (isub = mSubMeshList.begin(); isub != isubend; ++isub)
-        {
-            // check if triangles are present
-            if ((*isub)->indexData->indexCount > 0)
-            {
-                // Set up data for reduction
-                VertexData* pVertexData = (*isub)->useSharedVertices ? sharedVertexData : (*isub)->vertexData;
-
-                ProgressiveMesh pm(pVertexData, (*isub)->indexData);
-                pm.build(
-                static_cast<ushort>(lodValues.size()),
-                    &((*isub)->mLodFaceList),
-                    reductionMethod, reductionValue);
-
-            }
-            else
-            {
-                // create empty index data for each lod
-                for (size_t i = 0; i < lodValues.size(); ++i)
-                {
-                    (*isub)->mLodFaceList.push_back(OGRE_NEW IndexData);
-                }
-            }
-        }
-
-        // Iterate over the lods and record usage
-        LodValueList::const_iterator ivalue, ivalueend;
-        ivalueend = lodValues.end();
-        mMeshLodUsageList.resize(lodValues.size() + 1);
-        MeshLodUsageList::iterator ilod = mMeshLodUsageList.begin();
-        for (ivalue = lodValues.begin(); ivalue != ivalueend; ++ivalue)
-        {
-            // Record usage
-            MeshLodUsage& lod = *++ilod;
-            lod.userValue = (*ivalue);
-            lod.value = mLodStrategy->transformUserValue(lod.userValue);
-            lod.edgeData = 0;
-            lod.manualMesh.setNull();
-        }
-        mNumLods = static_cast<ushort>(lodValues.size() + 1);
-    }
-    //---------------------------------------------------------------------
     ushort Mesh::getNumLodLevels(void) const
     {
         return mNumLods;

OgreMain/src/OgreProgressiveMesh.cpp

 */
 
 #include "OgreProgressiveMesh.h"
+#include "OgreLodStrategyManager.h"
+#include "OgreMeshManager.h"
+#include "OgreSubMesh.h"
 #include "OgreString.h"
 #include "OgreHardwareBufferManager.h"
-#include <algorithm>
-
-#include <iostream>
+#include "OgreLogManager.h"
 
 #if OGRE_DEBUG_MODE 
+#define LOG_PROGRESSIVE_MESH_GENERATION 1
+#else
+#define LOG_PROGRESSIVE_MESH_GENERATION 0
+#endif
+
+#if LOG_PROGRESSIVE_MESH_GENERATION 
+#include <iostream>
 std::ofstream ofdebug;
 #endif 
 
 namespace Ogre {
-	#define NEVER_COLLAPSE_COST 99999.9f
 
-
-    /** Comparator for unique vertex list
-    */
-    struct vectorLess
+	const unsigned char BitArray::bit_count[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+	const unsigned char BitArray::bit_mask[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
+	
+//#define IGNORE_UV_AND_NORMAL_COSTS
+//#define CHECK_CALCULATED_NORMALS
+	
+#define NEVER_COLLAPSE_COST 99999.9f
+	
+	class VertexDataVariant
+	{
+	private:
+		unsigned char*	mBaseDataPointer;
+		unsigned char*	mCurDataPointer;
+		size_t			mOffsetSize;
+		
+		// source dependent part, only first buffer of several with shared mSource holds lock on hardware vertex buffer
+		int mSource;
+		HardwareVertexBufferSharedPtr mBuffer;
+		std::auto_ptr<VertexBufferLockGuard> mLockGuard;	// only first by source takes lock
+		
+		friend class VertexDataVariantList;
+		
+		VertexDataVariant(const VertexData * vertexData, const VertexElement* vertexElement, VertexDataVariant* bufferOwner)
+			: mOffsetSize(0)
+			, mSource(-1)
+		{
+			static float fakeDataBuffer[3] = { 0.0f, 0.0f, 0.0f }; // 12 bytes, can be safely used with zero mOffsetSize
+			mBaseDataPointer = mCurDataPointer = (unsigned char*)fakeDataBuffer;
+			
+			if(NULL != vertexElement)
+			{
+				mSource = vertexElement->getSource();
+				mBuffer = vertexData->vertexBufferBinding->getBuffer(mSource);
+				
+				// only first VertexDataVariant really locks buffer and store pointer to raw data
+				if(NULL == bufferOwner)
+				{
+					// buffer is not locked yet, so lock it and became buffer owner
+					mLockGuard.reset(new VertexBufferLockGuard(mBuffer, HardwareBuffer::HBL_READ_ONLY));
+					bufferOwner = this;
+				}
+				
+				// adjust whole vertex pointer to vertex part pointer
+				vertexElement->baseVertexPointerToElement(bufferOwner->mLockGuard->pData, &mBaseDataPointer);
+				mCurDataPointer = mBaseDataPointer;
+				mOffsetSize = mBuffer->getVertexSize();
+			}
+		}
+		
+	public:
+		bool isValid() const						{ return mOffsetSize != 0; }
+		
+		int getSource() const						{ return mSource; }
+		unsigned char* getBaseDataPointer() const	{ return mBaseDataPointer; }
+		unsigned char* getCurDataPointer() const	{ return mCurDataPointer; }
+		size_t getOffsetSize() const				{ return mOffsetSize; }
+		
+		void reset()								{ mCurDataPointer = mBaseDataPointer; }
+		void offset()								{ mCurDataPointer += mOffsetSize; }
+		void offsetToElement(int itemIndex)			{ mCurDataPointer = mBaseDataPointer + itemIndex * mOffsetSize;	}
+		
+		Vector3 getNextVector3() 
+		{	
+			float* v = (float*)mCurDataPointer;
+			mCurDataPointer += mOffsetSize;
+			return Vector3(v[0], v[1], v[2]);
+		}
+		
+		Vector2 getNextVector2()
+		{
+			float* v = (float*)mCurDataPointer;
+			mCurDataPointer += mOffsetSize;
+			return Vector2(v[0], v[1]);
+		}		
+	};
+	
+	class VertexDataVariantList
+	{
+	public:
+		VertexDataVariant* create(const VertexData * vertexData, VertexElementSemantic sem)
+		{
+			const VertexElement* vertexElement = vertexData->vertexDeclaration->findElementBySemantic(sem);
+			VertexDataVariant* bufferOwner = vertexElement ? getBySource(vertexElement->getSource()) : NULL;
+			mVdList.push_back(VertexDataVariantSharedPtr(new VertexDataVariant(vertexData, vertexElement, bufferOwner)));
+			return mVdList.back().get();
+		}
+		
+	private:
+		VertexDataVariant* getBySource(int source)
+		{
+			for(vd_list_t::const_iterator it = mVdList.begin(); it != mVdList.end(); ++it)
+				if((*it)->getSource() == source)
+					return it->get();
+			
+			return NULL;
+		}
+		
+	private:
+		typedef SharedPtr<VertexDataVariant> VertexDataVariantSharedPtr;
+		typedef vector<VertexDataVariantSharedPtr>::type vd_list_t;
+		vd_list_t mVdList;
+	};
+	
+	class IndexDataVariant;
+	typedef SharedPtr<IndexDataVariant> IndexDataVariantSharedPtr;
+	
+	class IndexDataVariant
+	{
+	private:
+		HardwareIndexBufferSharedPtr mBuffer;
+		unsigned char* mBaseDataPointer;
+		unsigned char* mCurDataPointer;
+		size_t mIndexCount;
+		size_t mOffsetSize;
+		bool mUse32bitIndexes;
+		std::auto_ptr<IndexBufferLockGuard> mLockGuard;
+		
+		IndexDataVariant(const IndexData * indexData, HardwareBuffer::LockOptions lockOpt)
+			: mIndexCount(0)
+			, mOffsetSize(0)
+			, mUse32bitIndexes(false)
+		{
+			static int fakeIndexBuffer = 0;	// 4 bytes, can be safely used with zero mOffsetSize
+			mBaseDataPointer = mCurDataPointer = (unsigned char*)&fakeIndexBuffer;
+			
+			if(NULL == indexData) return;
+			
+			mBuffer = indexData->indexBuffer;
+			if(mBuffer.isNull()) return;
+			
+			mIndexCount = indexData->indexCount;
+			if(0 == mIndexCount) return;
+			
+			mUse32bitIndexes = (mBuffer->getType() == HardwareIndexBuffer::IT_32BIT);
+			mOffsetSize = mUse32bitIndexes ? sizeof(unsigned int) : sizeof(unsigned short);
+			
+			mLockGuard.reset(new IndexBufferLockGuard(mBuffer, lockOpt));
+			mBaseDataPointer = (unsigned char*)mLockGuard->pData;
+			
+			reset();
+		}
+		
+		bool isValid() const { return mOffsetSize != 0; }
+		
+	public:
+		
+		static IndexDataVariantSharedPtr create(const IndexData * indexData, HardwareBuffer::LockOptions lockOpt = HardwareBuffer::HBL_READ_ONLY)
+		{
+			IndexDataVariantSharedPtr p(new IndexDataVariant(indexData, lockOpt));
+			return p->isValid() ? p : IndexDataVariantSharedPtr();	
+		}
+		
+		unsigned char* getBaseDataPointer() const	{ return mBaseDataPointer; }
+		unsigned char* getCurDataPointer() const	{ return mCurDataPointer; }
+		size_t getOffsetSize() const				{ return mOffsetSize; }
+		size_t getIndexCount() const				{ return mIndexCount; }
+		bool is32bitIndexes() const					{ return mUse32bitIndexes; }
+		
+		void reset()								{ mCurDataPointer = mBaseDataPointer; }
+		void offsetToElement(int itemIndex)			{ mCurDataPointer = getBaseDataPointer() + itemIndex * getOffsetSize();	}
+		unsigned getNextIndex()						{ unsigned idx = mUse32bitIndexes ? *(unsigned int*)mCurDataPointer : *(unsigned short*)mCurDataPointer; mCurDataPointer += mOffsetSize; return idx; }
+		
+		void markUsedVertices(BitArray& bitmask) const
+		{
+			if(mUse32bitIndexes)
+			{
+				for(const unsigned int *ptr = (const unsigned int*)mBaseDataPointer, *end_ptr = ptr + mIndexCount; ptr < end_ptr; ++ptr)
+					bitmask.setBit(*ptr);
+			}
+			else
+			{
+				for(const unsigned short *ptr = (const unsigned short*)mBaseDataPointer, *end_ptr = ptr + mIndexCount; ptr < end_ptr; ++ptr)
+					bitmask.setBit(*ptr);
+			}
+		}
+		
+		void createIndexData(IndexData* pIndexData, bool use16bitIndexes, vector<unsigned>::type* indexMap)
+		{
+			size_t indexCount = getIndexCount();
+			reset();
+			
+			pIndexData->indexStart = 0;
+			pIndexData->indexCount = indexCount;
+			pIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer(
+					  use16bitIndexes ? HardwareIndexBuffer::IT_16BIT : HardwareIndexBuffer::IT_32BIT,
+					  indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY);
+			
+			IndexBufferLockGuard outIdataLock(pIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD);
+			
+			unsigned short* pShortOut = use16bitIndexes ? (unsigned short*)outIdataLock.pData : NULL;
+			unsigned int* pIntOut = use16bitIndexes ? NULL : (unsigned int*)outIdataLock.pData;
+			
+			if(use16bitIndexes)
+			{
+				for(size_t n = 0; n < indexCount; ++n)
+				{
+					unsigned idx = getNextIndex();
+					*pShortOut++ = indexMap ? (*indexMap)[idx] : idx;
+				}
+			}
+			else
+			{
+				for(size_t n = 0; n < indexCount; ++n)
+				{
+					unsigned idx = getNextIndex();
+					*pIntOut++ = indexMap ? (*indexMap)[idx] : idx;
+				}
+			}
+		}
+	};	
+	
+    //---------------------------------------------------------------------
+	ProgressiveMesh::ProgressiveMesh(SubMesh* pSubMesh)
+		: m_pSubMesh(pSubMesh)
+		, mCurrNumIndexes(0)
+		, mVertexComponentFlags(0)
     {
-		_OgreExport bool operator()(const Vector3& v1, const Vector3& v2) const
-        {
-			if (v1.x < v2.x) return true;
-			if (v1.x == v2.x && v1.y < v2.y) return true;
-			if (v1.x == v2.x && v1.y == v2.y && v1.z < v2.z) return true;
-
-			return false;
+		// ignore un-indexed submeshes
+		if(pSubMesh->indexData->indexCount == 0)
+		{
+			m_pSubMesh = NULL;
+			return;
 		}
-	};
-    //---------------------------------------------------------------------
-    ProgressiveMesh::ProgressiveMesh(const VertexData* vertexData, 
-        const IndexData* indexData)
-    {
-        addWorkingData(vertexData, indexData);
-        mpVertexData = vertexData;
-        mpIndexData = indexData;
-        mWorstCosts.resize(vertexData->vertexCount);
-
-
-
+		
+		Ogre::Mesh* pMesh = pSubMesh->parent;
+		Real sqrDiag = pMesh->getBounds().getSize().squaredLength();
+		mInvSquaredBoundBoxDiagonal = (0.0 != sqrDiag) ? 1.0 / sqrDiag : 0.0;
+		
+		mNextWorstCostHint = 0;
+		mInvalidCostCount = 0;
+		mRemovedVertexDuplicatesCount = 0;
+		
+		mpVertexData	= pSubMesh->useSharedVertices ? pMesh->sharedVertexData : pSubMesh->vertexData;
+		mpIndexData		= pSubMesh->indexData;
+		
+		mInvalidCostMask.resize(mpVertexData->vertexCount);
+        addWorkingData(mpVertexData, mpIndexData);
     }
     //---------------------------------------------------------------------
     ProgressiveMesh::~ProgressiveMesh()
         addWorkingData(vertexData, mpIndexData);
     }
     //---------------------------------------------------------------------
-    void ProgressiveMesh::build(ushort numLevels, LODFaceList* outList, 
-			VertexReductionQuota quota, Real reductionValue)
-    {
-        IndexData* newLod;
+	void ProgressiveMesh::initializeProgressiveMeshList(ProgressiveMeshList& pmList, Mesh* pMesh)
+	{
+		size_t subMeshCount = pMesh->getNumSubMeshes();
+		pmList.reserve(subMeshCount);
+		for(size_t i = 0; i < subMeshCount; ++i)
+		{
+			SubMesh* pSubMesh = pMesh->getSubMesh(i);
+			pmList.push_back(OGRE_NEW ProgressiveMesh(pSubMesh));
+		}		
+	}
+    //---------------------------------------------------------------------
+	void ProgressiveMesh::freeProgressiveMeshList(ProgressiveMeshList* pmList)
+	{
+		for(ProgressiveMeshList::iterator it = pmList->begin(); it != pmList->end(); ++it)
+		{
+			OGRE_DELETE *it;
+			*it = NULL;
+		}
+	}
+	//---------------------------------------------------------------------
+	bool ProgressiveMesh::generateLodLevels(Mesh* pMesh, const LodValueList& lodValues,
+								 VertexReductionQuota reductionMethod, Real reductionValue)
+	{
+#if OGRE_DEBUG_MODE
+		pMesh->getLodStrategy()->assertSorted(lodValues);
+#endif
+		
+		pMesh->removeLodLevels();
+		
+		LogManager::getSingleton().stream()	<< "Generating " << lodValues.size()	<< " lower LODs for mesh " << pMesh->getName();
+		
+		// Set up data for reduction
+		ProgressiveMeshList pmList;
+		initializeProgressiveMeshList(pmList, pMesh);
+		
+		bool generated = build(pmList, pMesh->getLodStrategy(), lodValues, reductionMethod, reductionValue);
+		
+		if(generated)
+		{
+			// transfer all LODs from ProgressiveMesh to the real one
+			size_t subMeshCount = pMesh->getNumSubMeshes();
+			for(size_t i = 0; i < subMeshCount; ++i)
+				pMesh->getSubMesh(i)->mLodFaceList.swap(pmList[i]->mLodFaceList);
+			
+			// register them
+			LodStrategy *lodStrategy = LodStrategyManager::getSingleton().getStrategy(pMesh->getLodStrategy()->getName());
+            bakeLodUsage(pMesh, lodStrategy, lodValues, false);
+		}
+		
+		freeProgressiveMeshList(&pmList);
+		
+		return generated;
+	}
+    //---------------------------------------------------------------------
+	MeshPtr ProgressiveMesh::generateSimplifiedMesh(const String& name, const String& groupName, Mesh* inMesh,
+													bool dropOriginalGeometry, const LodValueList& lodValues,
+													VertexReductionQuota reductionMethod, Real reductionValue,
+													size_t* removedVertexDuplicatesCount)
+	{
+#if OGRE_DEBUG_MODE
+		inMesh->getLodStrategy()->assertSorted(lodValues);
+#endif
+		LogManager::getSingleton().stream()	<< "Generating simplified mesh " << name << " for mesh " << inMesh->getName();
 
-        computeAllCosts();
+		// Set up data for reduction
+		ProgressiveMeshList pmList;
+		initializeProgressiveMeshList(pmList, inMesh);
+		
+		// Perform reduction
+		build(pmList, inMesh->getLodStrategy(), lodValues, reductionMethod, reductionValue);
 
-#if OGRE_DEBUG_MODE
-		dumpContents("pm_before.log");
+		// Bake new simplified mesh
+		MeshPtr simplifiedMesh = MeshManager::getSingleton().createManual(name, groupName);
+		bakeSimplifiedMesh(simplifiedMesh.get(), inMesh, pmList, dropOriginalGeometry);
+		LodStrategy *lodStrategy = LodStrategyManager::getSingleton().getStrategy(inMesh->getLodStrategy()->getName());
+        bakeLodUsage(simplifiedMesh.get(), lodStrategy, lodValues, dropOriginalGeometry);
+		
+		// Return some statistic
+		if(removedVertexDuplicatesCount)
+		{
+			size_t duplicatesCount = 0;
+			for(ProgressiveMeshList::iterator it = pmList.begin(); it != pmList.end(); ++it)
+				duplicatesCount += (*it)->mRemovedVertexDuplicatesCount;
+			*removedVertexDuplicatesCount = duplicatesCount;
+		}
+		
+		freeProgressiveMeshList(&pmList);
+		return simplifiedMesh;
+	}
+    //---------------------------------------------------------------------	
+	bool ProgressiveMesh::build(ProgressiveMeshList& pmInList,
+								const LodStrategy *lodStrategy, const LodValueList& lodValues,
+								VertexReductionQuota quota, Real reductionValue)
+	{		
+		assert(!pmInList.empty());
+		bool generated = false;
+		
+		size_t numVerts = 0;
+		
+		ProgressiveMeshList pmBuildList;
+		
+		for(ProgressiveMeshList::iterator i = pmInList.begin(); i != pmInList.end(); ++i)
+		{
+			ProgressiveMesh* p = *i;
+			if(NULL == p->m_pSubMesh)
+				continue; // dummy, skip it
+			
+			p->computeAllCosts();
+			
+			// Init
+			p->mCurrNumIndexes = (Ogre::RenderOperation::OT_TRIANGLE_LIST == p->m_pSubMesh->operationType) ?
+				p->mpIndexData->indexCount : (p->mpIndexData->indexCount - 2) * 3;
+			
+#if LOG_PROGRESSIVE_MESH_GENERATION			
+			StringUtil::StrStreamType logname;
+			logname << "pm_before_" << std::distance(pmInList.begin(), i) << ".log";
+			(*i)->dumpContents(logname.str());
 #endif
+			numVerts += p->mWorstCostsSize;
+			
+			pmBuildList.push_back(p);
+		}
+		
+		ProgressiveMeshList pmList(pmBuildList);
+		
+		// if any one of this two checks is failed - we complete one LOD generation
+		size_t numCollapses = numVerts;			// unlimited
+		Real costLimit = NEVER_COLLAPSE_COST;	// unlimited
+		
+		bool abandon = false;
+				
+		for (LodValueList::const_iterator lod = lodValues.begin(); lod != lodValues.end(); ++lod)
+		{
+			int level = std::distance(lodValues.begin(), lod);
+			
+			// adjust LOD target limits
+			switch(quota)
+			{
+				case VRQ_CONSTANT:		
+					numCollapses = static_cast<size_t>(reductionValue);
+					break;
+					
+				case VRQ_PROPORTIONAL:
+					numCollapses = static_cast<size_t>(numVerts * reductionValue);
+					numVerts -= numCollapses;
+					break;
+					
+				case VRQ_ERROR_COST:
+					// we must increase cost limit with each next lod level proportionally to squared distance ratio or inverted pixel area ratio
+					Real reductionValueMultiplier = lodStrategy->transformBias(lodStrategy->transformUserValue(lodValues[0]) / lodStrategy->transformUserValue(lodValues[level]));
+					assert(level == 0 || reductionValueMultiplier > 1.0);
+					costLimit = reductionValue * reductionValueMultiplier;
+					break;
+			}
+						
+			// NB if 'abandon' is set, we stop reducing 
+			// However, we still bake the number of LODs requested, even if it 
+			// means they are the same
+			while(numCollapses > 0 && !abandon)
+			{	
+				ProgressiveMesh* pmCur;			//out
+				CostIndexPair* collapseTarget;	// out
+				getNextCollapser(pmList, pmCur, collapseTarget);
 
-        // Init
-        mCurrNumIndexes = mpIndexData->indexCount;
-        size_t numVerts, numCollapses;
-        // Use COMMON vert count, not original vert count
-        // Since collapsing 1 common vert position is equivalent to collapsing them all
-        numVerts = mNumCommonVertices;
+				// we found collapse target, but may be we must not collapse it
+				if(collapseTarget != NULL)
+				{
+					assert(collapseTarget->first != NEVER_COLLAPSE_COST);
+					if(VRQ_ERROR_COST == quota)
+					{
+						Real cost = collapseTarget->first;
+						if(cost > costLimit)
+							collapseTarget = NULL;
+					}
+					else // VRQ_CONSTANT, VRQ_PROPORTIONAL
+					{
+						if(getInvalidCostCount(pmList) >= numCollapses)
+							collapseTarget = NULL; // need recalc
+					}
+				}
+				
+				// if we have not collapse target but have invalid costs - recalc them
+				if(collapseTarget == NULL)
+				{
+					if(recomputeInvalidCosts(pmList))
+					{
+						// a some invalid costs was recomputed and we should try to continue collapsing
+						// because the recomputed best cost can be less than level limit;
+						continue;
+					}
+					else
+					{
+						abandon = pmList.empty();
+						break; // an invalid costs is not found and we complete collapsing for the current LOD
+					}
+				}
+				
+				// OK, we decide to collapse this target
+				assert(collapseTarget);				
+				assert(pmCur);
+				assert(numCollapses > 0);
+				
+				// Collapse on every buffer
+				WorkingDataList::iterator idata, idataend;
+				idataend = pmCur->mWorkingData.end();
+				
+				for (idata = pmCur->mWorkingData.begin(); idata != idataend; ++idata)
+				{
+					PMVertex* collapser = &(idata->mVertList[collapseTarget->second]);
+					
+					if(collapser->face.size() == pmCur->mCurrNumIndexes / 3
+					|| collapser->collapseTo == NULL)
+					{
+						// Must have run out of valid collapsables
+						pmList.erase(std::remove(pmList.begin(), pmList.end(), pmCur), pmList.end());
+						abandon = pmList.empty();						
+						break;
+					}
+										
+#if LOG_PROGRESSIVE_MESH_GENERATION 
+					ofdebug << "Collapsing index " << (unsigned int)collapser->index <<	"(border: "<< (collapser->mBorderStatus == PMVertex::BS_BORDER ? "yes" : "no") <<
+					") to " << (unsigned int)collapser->collapseTo->index << "(border: "<< (collapser->collapseTo->mBorderStatus == PMVertex::BS_BORDER ? "yes" : "no") <<
+					")" << std::endl;
+#endif
+					assert(collapser->collapseTo->removed == false);
+					assert(pmCur->mCurrNumIndexes > 0);
+					
+					pmCur->collapse(collapser);
+					
+					assert(pmCur->mCurrNumIndexes > 0);
+				}
+
+				// we must never return to it
+				collapseTarget->first = NEVER_COLLAPSE_COST;
+				--numCollapses;
+			}
+			// end of one LOD collapsing loop
+			
+			// Bake a new LOD and add it to the list
+			for(ProgressiveMeshList::iterator i = pmBuildList.begin(); i != pmBuildList.end(); ++i)
+			{
+				ProgressiveMesh* p = *i;
+				assert(NULL != p->m_pSubMesh); //dummy can't happen here
+				
+#if LOG_PROGRESSIVE_MESH_GENERATION
+				StringUtil::StrStreamType logname;
+				ProgressiveMeshList::iterator t = std::find(pmInList.begin(), pmInList.end(), p);
+				assert(t != pmInList.end());
+				logname << "pm_" << std::distance(pmInList.begin(), t) << "__level_" << level << ".log";
+				(*i)->dumpContents(logname.str());
+#endif								
+				IndexData* lodData = NULL;
+				
+				if(p->mCurrNumIndexes != p->mpIndexData->indexCount)
+				{
+					assert(p->mCurrNumIndexes > 0);
+					
+					lodData = OGRE_NEW IndexData();
+					p->bakeNewLOD(lodData);
+					generated = true;
+				}
+				else
+				{
+					p->mRemovedVertexDuplicatesCount = 0;
+					lodData = p->m_pSubMesh->indexData->clone();
+				}
+				
+				assert(NULL != lodData);
+				
+				p->mLodFaceList.push_back(lodData);
+			}
+		}
 		
-#if OGRE_DEBUG_MODE 
-		ofdebug.open("progressivemesh.log");
-#endif
-		numCollapses = 0;
-		bool abandon = false;
-		while (numLevels--)
-        {
-            // NB idf 'abandon' is set, we stop reducing 
-            // However, we still bake the number of LODs requested, even if it 
-            // means they are the same
-            if (!abandon)
-            {
-			    if (quota == VRQ_PROPORTIONAL)
-			    {
-				    numCollapses = static_cast<size_t>(numVerts * reductionValue);
-			    }
-			    else 
-			    {
-				    numCollapses = static_cast<size_t>(reductionValue);
-			    }
-                // Minimum 3 verts!
-                if ( (numVerts - numCollapses) < 3) 
-                    numCollapses = numVerts - 3;
-			    // Store new number of verts
-			    numVerts = numVerts - numCollapses;
-
-			    while(numCollapses-- && !abandon)
-                {
-                    size_t nextIndex = getNextCollapser();
-                    // Collapse on every buffer
-                    WorkingDataList::iterator idata, idataend;
-                    idataend = mWorkingData.end();
-                    for (idata = mWorkingData.begin(); idata != idataend; ++idata)
-                    {
-                        PMVertex* collapser = &( idata->mVertList.at( nextIndex ) );
-                        // This will reduce mCurrNumIndexes and recalc costs as required
-					    if (collapser->collapseTo == NULL)
-					    {
-						    // Must have run out of valid collapsables
-						    abandon = true;
-						    break;
-					    }
-#if OGRE_DEBUG_MODE 
-					    ofdebug << "Collapsing index " << (unsigned int)collapser->index << "(border: "<< collapser->isBorder() <<
-						    ") to " << (unsigned int)collapser->collapseTo->index << "(border: "<< collapser->collapseTo->isBorder() <<
-						    ")" << std::endl;
-#endif
-					    assert(collapser->collapseTo->removed == false);
-
-                        collapse(collapser);
-                    }
-
-                }
-            }
-#if OGRE_DEBUG_MODE
-			StringUtil::StrStreamType logname;
-			logname << "pm_level" << numLevels << ".log";
-			dumpContents(logname.str());
-#endif
-
-            // Bake a new LOD and add it to the list
-            newLod = OGRE_NEW IndexData();
-            bakeNewLOD(newLod);
-            outList->push_back(newLod);
-			
-        }
-
-
-
-    }
-    //---------------------------------------------------------------------
+		return generated;
+	}	
+	//---------------------------------------------------------------------
     void ProgressiveMesh::addWorkingData(const VertexData * vertexData, 
         const IndexData * indexData)
     {
+		if(0 == vertexData->vertexCount || 0 == indexData->indexCount)
+			return;
+		
         // Insert blank working data, then fill 
         mWorkingData.push_back(PMWorkingData());
-
         PMWorkingData& work = mWorkingData.back();
 
         // Build vertex list
 		work.mFaceVertList.resize(vertexData->vertexCount);
 		// Also resize common vert list to max, to avoid reallocations
 		work.mVertList.resize(vertexData->vertexCount);
+		
+		VertexDataVariantList vdVariantList;
+		
+		VertexDataVariant* vertexDataBuffer = vdVariantList.create(vertexData, VES_POSITION);
+		VertexDataVariant* normalDataBuffer = vdVariantList.create(vertexData, VES_NORMAL);
+		VertexDataVariant* uvDataBuffer = vdVariantList.create(vertexData, VES_TEXTURE_COORDINATES);
+		
+		mVertexComponentFlags |= (1 << VES_POSITION);
+		mVertexComponentFlags |= (1 << VES_NORMAL);
+		mVertexComponentFlags |= (1 << VES_TEXTURE_COORDINATES);
+		
+		IndexDataVariantSharedPtr indexDataVar(IndexDataVariant::create(indexData));
+		
+		if(indexDataVar.isNull())
+			return;
 
-		// locate position element & hte buffer to go with it
-		const VertexElement* posElem = vertexData->vertexDeclaration->findElementBySemantic(VES_POSITION);
-		HardwareVertexBufferSharedPtr vbuf = 
-			vertexData->vertexBufferBinding->getBuffer(posElem->getSource());
-		// lock the buffer for reading