Commits

Steve Borho committed e7a5780 Merge

Merge with stable

Comments (0)

Files changed (7)

source/CMakeLists.txt

 cmake_minimum_required (VERSION 2.8.8) # OBJECT libraries require 2.8.8
 include(CheckIncludeFiles)
 include(CheckFunctionExists)
+include(CheckCXXCompilerFlag)
 
 # X265_BUILD must be incremented each time the public API is changed
 set(X265_BUILD 3)
     set(GCC 1)
     add_definitions(-Wall -Wextra -Wshadow -ffast-math)
 elseif(CMAKE_COMPILER_IS_GNUCXX)
-    add_definitions(-Wall -Wextra -Wshadow -mstackrealign -ffast-math)
+    add_definitions(-Wall -Wextra -Wshadow -ffast-math)
+    check_cxx_compiler_flag(-Wno-narrowing GCC_HAS_NO_NARROWING) 
+    check_cxx_compiler_flag(-mstackrealign GCC_HAS_STACK_REALIGN) 
+    if (GCC_HAS_STACK_REALIGN)
+        add_definitions(-mstackrealign)
+    endif()
     execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
     if(NOT GCC_VERSION VERSION_LESS 4.7)
         # this is necessary to avoid name conflicts in vector class
 if(NOT MSVC)
     set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
 endif()
-if(UNIX AND NOT CLANG)
+if(UNIX AND NOT APPLE)
     set_target_properties(x265-shared PROPERTIES LINK_FLAGS "-Wl,-Bsymbolic")
 endif()
 # WIN32 builds static: x265-static.lib  shared: x265.dll + x265.lib (shim loader)

source/common/CMakeLists.txt

         # force gcc to generate code for sync primitives
         set_source_files_properties(threadpool.cpp reference.cpp wavefront.cpp common.cpp PROPERTIES COMPILE_FLAGS -march=i686)
     endif()
-    if (NOT CLANG AND NOT INTEL_CXX)
+    if (GCC_HAS_NO_NARROWING)
         set_source_files_properties(cpu.cpp PROPERTIES COMPILE_FLAGS -Wno-narrowing)
     endif()
 endif(GCC)

source/common/common.cpp

     param->keyframeMin = 0;
     param->keyframeMax = 250;
     param->bOpenGOP = 0;
-    param->bframes = 3;
-    param->lookaheadDepth = 40;
-    param->bFrameAdaptive = X265_B_ADAPT_FAST;
-    param->bpyramid = 1;
+    param->bframes = 4;
+    param->lookaheadDepth = 20;
+    param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
+    param->bpyramid = 2;
     param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
 
     /* Intra Coding Tools */
     param->bEnableStrongIntraSmoothing = 1;
 
     /* Inter Coding tools */
-    param->searchMethod = X265_STAR_SEARCH;
-    param->subpelRefine = 5;
+    param->searchMethod = X265_HEX_SEARCH;
+    param->subpelRefine = 2;
     param->searchRange = 60;
-    param->maxNumMergeCand = 3;
+    param->maxNumMergeCand = 2;
     param->bEnableWeightedPred = 1;
     param->bEnableWeightedBiPred = 0;
     param->bEnableEarlySkip = 0;
     param->rdLevel = X265_NO_RDO_NO_RDOQ;
     param->bEnableRDO = 0;
     param->bEnableRDOQ = 0;
-    param->bEnableRDOQTS = 1;
+    param->bEnableRDOQTS = 0;
     param->bEnableSignHiding = 1;
     param->bEnableTransformSkip = 0;
-    param->bEnableTSkipFast = 1;
+    param->bEnableTSkipFast = 0;
     param->maxNumReferences = 3;
     
     /* Loop Filter */
         {
             param->lookaheadDepth = 10;
             param->maxCUSize = 32;
-            param->searchRange = 24;
+            param->searchRange = 28;
             param->bFrameAdaptive = 0;
-            param->bframes = 4;
-            param->tuQTMaxInterDepth = 1;
-            param->tuQTMaxIntraDepth = 1;
+            param->bpyramid = 1;
             param->subpelRefine = 0;
             param->maxNumMergeCand = 2;
             param->searchMethod = X265_DIA_SEARCH;
             param->bEnableRectInter = 0;
             param->bEnableAMP = 0;
-            param->bEnableTransformSkip = 0;
             param->bEnableEarlySkip = 1;
             param->bEnableCbfFastMode = 1;
-            param->bEnableLoopFilter = 0;
+            param->bEnableLoopFilter = 1;
             param->bEnableSAO = 0;
             param->bEnableSignHiding = 0;
             param->bEnableWeightedPred = 0;
         {
             param->lookaheadDepth = 10;
             param->maxCUSize = 32;
-            param->searchRange = 24;
+            param->searchRange = 44;
             param->bFrameAdaptive = 0;
-            param->bframes = 4;
-            param->tuQTMaxInterDepth = 1;
-            param->tuQTMaxIntraDepth = 1;
+            param->bpyramid = 1;
             param->subpelRefine = 1;
-            param->maxNumMergeCand = 2;
-            param->searchMethod = X265_HEX_SEARCH;
             param->bEnableRectInter = 0;
             param->bEnableAMP = 0;
-            param->bEnableTransformSkip = 0;
             param->bEnableEarlySkip = 1;
             param->bEnableCbfFastMode = 1;
-            param->bEnableSAO = 0;
-            param->bEnableSignHiding = 0;
+            param->bEnableWeightedPred = 0;
             param->maxNumReferences = 1;
         }
         else if (!strcmp(preset, "veryfast"))
         {
-            param->lookaheadDepth = 10;
-            param->bFrameAdaptive = 1;
-            param->tuQTMaxInterDepth = 1;
-            param->tuQTMaxIntraDepth = 1;
+            param->lookaheadDepth = 15;
+            param->maxCUSize = 32;
+            param->bFrameAdaptive = 0;
+            param->bpyramid = 1;
             param->subpelRefine = 1;
-            param->searchMethod = X265_HEX_SEARCH;
-            param->maxNumMergeCand = 2;
             param->bEnableRectInter = 0;
             param->bEnableAMP = 0;
-            param->bEnableTransformSkip = 0;
             param->bEnableEarlySkip = 1;
             param->bEnableCbfFastMode = 1;
             param->maxNumReferences = 1;            
         }
         else if (!strcmp(preset, "faster"))
         {
-            param->lookaheadDepth = 20;
-            param->bFrameAdaptive = 1;
-            param->tuQTMaxInterDepth = 1;
-            param->tuQTMaxIntraDepth = 1;
-            param->subpelRefine = 2;
-            param->searchMethod = X265_HEX_SEARCH;
-            param->maxNumMergeCand = 2;
+            param->lookaheadDepth = 15;
+            param->bFrameAdaptive = 0;
+            param->bpyramid = 1;
             param->bEnableRectInter = 0;
             param->bEnableAMP = 0;
-            param->bEnableTransformSkip = 0;
-            param->maxNumReferences = 2;
             param->bEnableEarlySkip = 1;
             param->bEnableCbfFastMode = 1;
+            param->maxNumReferences = 1;
         }
         else if (!strcmp(preset, "fast"))
         {
-            param->lookaheadDepth = 20;
-            param->bFrameAdaptive = 1;
-            param->tuQTMaxInterDepth = 1;
-            param->tuQTMaxIntraDepth = 1;
-            param->subpelRefine = 3;
-            param->searchMethod = X265_HEX_SEARCH;
-            param->maxNumMergeCand = 2;
+            param->lookaheadDepth = 15;
+            param->bpyramid = 1;
+            param->bEnableRectInter = 0;
             param->bEnableAMP = 0;
-            param->bEnableTransformSkip = 0;
-            param->maxNumReferences = 2;
         }
         else if (!strcmp(preset, "medium"))
         {
         }
         else if (!strcmp(preset, "slow"))
         {
-            param->bFrameAdaptive = 2;
-            param->lookaheadDepth = 50;
-            param->bframes = 4;
-            param->tuQTMaxInterDepth = 1;
-            param->tuQTMaxIntraDepth = 1;
+            param->lookaheadDepth = 25;
+            param->bframes = 8;
+            param->bpyramid = 1;
             param->rdLevel = 1;
-            param->maxNumMergeCand = 4;
-            param->bEnableTransformSkip = 0;
-            param->maxNumReferences = 3;
+            param->subpelRefine = 3;
+            param->maxNumMergeCand = 3;
+            param->searchMethod = X265_STAR_SEARCH;
         }
         else if (!strcmp(preset, "slower"))
         {
-            param->bFrameAdaptive = 2;
-            param->lookaheadDepth = 60;
-            param->bframes = 5;
+            param->lookaheadDepth = 30;
+            param->bframes = 8;
             param->tuQTMaxInterDepth = 2;
             param->tuQTMaxIntraDepth = 2;
             param->rdLevel = 2;
-            param->subpelRefine = 6;
-            param->maxNumMergeCand = 5;
-            param->bEnableTransformSkip = 0;
-            param->maxNumReferences = 3;
+            param->subpelRefine = 3;
+            param->maxNumMergeCand = 3;
+            param->searchMethod = X265_STAR_SEARCH;
         }
         else if (!strcmp(preset, "veryslow"))
         {
-            param->bFrameAdaptive = 2;
-            param->lookaheadDepth = 60;
+            param->lookaheadDepth = 40;
+            param->bframes = 8;
+            param->tuQTMaxInterDepth = 3;
+            param->tuQTMaxIntraDepth = 3;			
             param->rdLevel = 2;
-            param->bframes = 8;
-            param->subpelRefine = 6;
+            param->subpelRefine = 4;
+            param->maxNumMergeCand = 4;
+            param->searchMethod = X265_STAR_SEARCH;
             param->maxNumReferences = 5;
-            param->maxNumMergeCand = 5;
-            param->tuQTMaxInterDepth = 3;
-            param->tuQTMaxIntraDepth = 3;
         }
         else if (!strcmp(preset, "placebo"))
         {
-            param->bFrameAdaptive = 2;
             param->lookaheadDepth = 60;
-            param->bframes = 16;
-            param->rdLevel = 2;
-            param->subpelRefine = 7;
-            param->maxNumReferences = 16;
-            param->searchRange = 124;
-            param->bEnableTSkipFast = 0;
-            param->maxNumMergeCand = 5;
+            param->searchRange = 92;
+            param->bframes = 8;
             param->tuQTMaxInterDepth = 4;
             param->tuQTMaxIntraDepth = 4;
+            param->rdLevel = 2;
+            param->subpelRefine = 5;
+            param->maxNumMergeCand = 5;
+            param->searchMethod = X265_STAR_SEARCH;
+            param->bEnableTransformSkip = 1;
+            param->bEnableRDOQTS = 1;
+            param->maxNumReferences = 5;
             // TODO: optimized esa
         }
         else

source/common/vec/vec-primitives.cpp

         Setup_Vec_DCTPrimitives_sse3(p);
         Setup_Vec_BlockCopyPrimitives_sse3(p);
     }
+#else
+    if (cpuMask) p.sad[0] = p.sad[0]; // prevent compiler warnings
 #endif
 #ifdef HAVE_SSSE3
     if (cpuMask & X265_CPU_SSSE3)

source/encoder/encoder.cpp

 #include <time.h>
 
 #if HAVE_INT_TYPES_H
+#define __STDC_FORMAT_MACROS
 #include <inttypes.h>
-#define LL "%I64d"
+#define LL "%"PRIu64
 #else
 #define LL "%lld"
 #endif
             _param->frameNumThreads = 6;  // dual-socket 10-core IvyBridge or higher
         else if (poolThreadCount >= 16)
             _param->frameNumThreads = 5;  // 8 HT cores, or dual socket
-        else if (poolThreadCount >= 12)
-            _param->frameNumThreads = 3;  // 6 HT cores
+        else if (poolThreadCount >= 8)
+            _param->frameNumThreads = 3;  // 4 HT cores
         else if (poolThreadCount >= 4)
             _param->frameNumThreads = 2;  // Dual or Quad core
         else

source/encoder/frameencoder.cpp

     int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
     for (int l = 0; l < numPredDir; l++)
     {
-        wpScalingParam *w = NULL;
         for (int ref = 0; ref < slice->getNumRefIdx(l); ref++)
         {
-            if ((slice->isInterP() && slice->getPPS()->getUseWP()))
+            wpScalingParam *w = NULL;
+            if ((slice->isInterP() && slice->getPPS()->getUseWP() && slice->m_weightPredTable[l][ref]->bPresentFlag))
             {
                 w = slice->m_weightPredTable[l][ref];
                 slice->m_numWPRefs++;
                         refpic->m_reconRowWait.wait();
                     }
 
-                    if (slice->getPPS()->getUseWP() && (slice->getSliceType() == P_SLICE))
+                    if (slice->getPPS()->getUseWP() && slice->getSliceType() == P_SLICE && m_mref[l][ref].isWeighted)
                     {
                         m_mref[l][ref].applyWeight(row + refLagRows, m_numRows);
                     }
                             refpic->m_reconRowWait.wait();
                         }
 
-                        if (slice->getPPS()->getUseWP() && (slice->getSliceType() == P_SLICE))
+                        if (slice->getPPS()->getUseWP() && slice->getSliceType() == P_SLICE && m_mref[l][ref].isWeighted)
                         {
                             m_mref[list][ref].applyWeight(i + refLagRows, m_numRows);
                         }
         double qp_offset = 0;
         int maxBlockCols = (pic->getPicYuvOrg()->getWidth() + (16 - 1)) / 16;
         int maxBlockRows = (pic->getPicYuvOrg()->getHeight() + (16 - 1)) / 16;
-        int block_y = (cuAddr / pic->getPicSym()->getFrameWidthInCU()) * 4;
-        int block_x = (cuAddr * 4) - block_y * pic->getPicSym()->getFrameWidthInCU();
+        int noOfBlocks = g_maxCUWidth / 16;
+        int block_y = (cuAddr / pic->getPicSym()->getFrameWidthInCU()) * noOfBlocks;
+        int block_x = (cuAddr * noOfBlocks) - block_y * pic->getPicSym()->getFrameWidthInCU();
         int cnt = 0;
-        for (int h = 0; h < 4 && block_y < maxBlockRows; h++, block_y++)
+
+        for (int h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)
         {
-            for (int w = 0; w < 4 && (block_x + w) < maxBlockCols; w++)
+            for (int w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++)
             {
                 qp_offset += pic->m_lowres.qpAqOffset[block_x + w + (block_y * maxBlockCols)];
                 cnt++;

source/encoder/framefilter.cpp

         int stride2 = m_pic->getPicYuvRec()->getStride();
         int bEnd = ((row + 1) == (this->m_numRows - 1));
         int bStart = (row == 0);
-        int minPixY = row * 64 - 4 * !bStart;
-        int maxPixY = (row + 1) * 64 - 4 * !bEnd;
+        int minPixY = row * g_maxCUHeight - 4 * !bStart;
+        int maxPixY = (row + 1) * g_maxCUHeight - 4 * !bEnd;
         int ssim_cnt;
         x265_emms();