Commits

Anonymous committed 5acfb12

replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

Comments (0)

Files changed (13)

source/Lib/TLibCommon/TComDataCU.cpp

 
 TComDataCU* TComDataCU::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx)
 {
+    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
+        return NULL;
+
     uint32_t absPartIdxRT    = g_zscanToRaster[curPartUnitIdx];
     uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
 
-    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
-        return NULL;
-
     if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize))
     {
         if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
 
 TComDataCU* TComDataCU::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx)
 {
-    uint32_t absPartIdxLB     = g_zscanToRaster[curPartUnitIdx];
+    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
+        return NULL;
 
-    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
-    {
-        return NULL;
-    }
-
-    uint32_t numPartInCUSize  = m_pic->getNumPartInCUSize();
+    uint32_t absPartIdxLB    = g_zscanToRaster[curPartUnitIdx];
+    uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
 
     if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize))
     {
 
 TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t& blPartUnitIdx,  uint32_t curPartUnitIdx, uint32_t partUnitOffset)
 {
-    uint32_t absPartIdxLB     = g_zscanToRaster[curPartUnitIdx];
-
-    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset << LOG2_UNIT_SIZE)) >=
+    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >=
         m_slice->m_sps->picHeightInLumaSamples)
     {
         return NULL;
     }
 
-    uint32_t numPartInCUSize  = m_pic->getNumPartInCUSize();
+    uint32_t absPartIdxLB    = g_zscanToRaster[curPartUnitIdx];
+    uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
 
     if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize))
     {
 
 TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset)
 {
-    uint32_t absPartIdxRT    = g_zscanToRaster[curPartUnitIdx];
-
-    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset << LOG2_UNIT_SIZE)) >=
+    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >=
         m_slice->m_sps->picWidthInLumaSamples)
     {
         return NULL;
     }
 
+    uint32_t absPartIdxRT    = g_zscanToRaster[curPartUnitIdx];
     uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
 
     if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize))
         {
             if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset])
             {
-                uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
+                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
                 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset];
                 if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize))
                 {
     }
     // TMVP always enabled
     {
-        //>> MTK colocated-RightBottom
+        MV colmv;
         uint32_t partIdxRB;
 
         deriveRightBottomIdx(puIdx, partIdxRB);
 
-        uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB];
-        uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
-
-        MV colmv;
-        int refIdx;
         int lcuIdx = -1;
 
-        if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
+        // image boundary check
+        if (m_pic->getCU(m_cuAddr)->getCUPelX() + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
+            m_pic->getCU(m_cuAddr)->getCUPelY() + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
         {
-        }
-        else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxTmp] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
-        {
-        }
-        else
-        {
-            if ((absPartIdxTmp % numPartInCUSize < numPartInCUSize - 1) &&        // is not at the last column of LCU
-                (absPartIdxTmp / numPartInCUSize < numPartInCUSize - 1)) // is not at the last row    of LCU
+            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
+            uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
+            bool bNotLastCol = RasterAddress::lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of LCU
+            bool bNotLastRow = RasterAddress::lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row    of LCU
+
+            if (bNotLastCol && bNotLastRow)
             {
-                absPartAddr = g_rasterToZscan[absPartIdxTmp + numPartInCUSize + 1];
+                absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
                 lcuIdx = getAddr();
             }
-            else if (absPartIdxTmp % numPartInCUSize < numPartInCUSize - 1)       // is not at the last column of LCU But is last row of LCU
-                absPartAddr = g_rasterToZscan[(absPartIdxTmp + numPartInCUSize + 1) % m_pic->getNumPartInCU()];
-            else if (absPartIdxTmp / numPartInCUSize < numPartInCUSize - 1) // is not at the last row of LCU But is last column of LCU
+            else if (bNotLastCol)
+                absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
+            else if (bNotLastRow)
             {
-                absPartAddr = g_rasterToZscan[absPartIdxTmp + 1];
+                absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
                 lcuIdx = getAddr() + 1;
             }
-            else //is the right bottom corner of LCU
+            else // is the right bottom corner of LCU
                 absPartAddr = 0;
         }
 
-        refIdx = 0;
+        int refIdx = 0;
         uint32_t partIdxCenter;
         uint32_t curLCUIdx = getAddr();
         int dir = 0;
-        uint32_t arrayAddr = count;
         xDeriveCenterIdx(puIdx, partIdxCenter);
         bool bExistMV = lcuIdx >= 0 && xGetColMVP(REF_PIC_LIST_0, lcuIdx, absPartAddr, colmv, refIdx);
         if (!bExistMV)
         if (bExistMV)
         {
             dir |= 1;
-            mvFieldNeighbours[arrayAddr][0].setMvField(colmv, refIdx);
+            mvFieldNeighbours[count][0].setMvField(colmv, refIdx);
         }
 
         if (isInterB)
             if (bExistMV)
             {
                 dir |= 2;
-                mvFieldNeighbours[arrayAddr][1].setMvField(colmv, refIdx);
+                mvFieldNeighbours[count][1].setMvField(colmv, refIdx);
             }
         }
 
         if (dir != 0)
         {
-            interDirNeighbours[arrayAddr] = dir;
+            interDirNeighbours[count] = dir;
 
             count++;
         
         }
     }
 
-    uint32_t arrayAddr = count;
-
     if (isInterB)
     {
         const uint32_t cutoff = count * (count - 1);
                 int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
                 if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv))
                 {
-                    mvFieldNeighbours[arrayAddr][0].setMvField(mvFieldNeighbours[i][0].mv, refIdxL0);
-                    mvFieldNeighbours[arrayAddr][1].setMvField(mvFieldNeighbours[j][1].mv, refIdxL1);
-                    interDirNeighbours[arrayAddr] = 3;
+                    mvFieldNeighbours[count][0].setMvField(mvFieldNeighbours[i][0].mv, refIdxL0);
+                    mvFieldNeighbours[count][1].setMvField(mvFieldNeighbours[j][1].mv, refIdxL1);
+                    interDirNeighbours[count] = 3;
 
-                    arrayAddr++;
+                    count++;
 
-                    if (arrayAddr == maxNumMergeCand)
+                    if (count == maxNumMergeCand)
                         return;
                 }
             }
     int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0];
     int r = 0;
     int refcnt = 0;
-    while (arrayAddr < maxNumMergeCand)
+    while (count < maxNumMergeCand)
     {
-        interDirNeighbours[arrayAddr] = 1;
-        mvFieldNeighbours[arrayAddr][0].setMvField(MV(0, 0), r);
+        interDirNeighbours[count] = 1;
+        mvFieldNeighbours[count][0].setMvField(MV(0, 0), r);
 
         if (isInterB)
         {
-            interDirNeighbours[arrayAddr] = 3;
-            mvFieldNeighbours[arrayAddr][1].setMvField(MV(0, 0), r);
+            interDirNeighbours[count] = 3;
+            mvFieldNeighbours[count][1].setMvField(MV(0, 0), r);
         }
 
-        arrayAddr++;
+        count++;
 
         if (refcnt == numRefIdx - 1)
             r = 0;
 
     // TMVP always enabled
     {
-        // Get Temporal Motion Predictor
-        int refIdxCol = refIdx;
-        MV  colmv;
+        uint32_t absPartAddr = m_absIdxInLCU + partAddr;
+        MV colmv;
         uint32_t partIdxRB;
-        uint32_t absPartIdx;
-        uint32_t absPartAddr;
 
         deriveRightBottomIdx(partIdx, partIdxRB);
-        absPartAddr = m_absIdxInLCU + partAddr;
 
         //----  co-located RightBottom Temporal Predictor (H) ---//
-        absPartIdx = g_zscanToRaster[partIdxRB];
         int lcuIdx = -1;
-        if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
+
+        // image boundary check
+        if (m_pic->getCU(m_cuAddr)->getCUPelX() + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
+            m_pic->getCU(m_cuAddr)->getCUPelY() + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
         {
-        }
-        else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
-        {
-        }
-        else
-        {
+            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
             uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
-            if ((absPartIdx % numPartInCUSize < numPartInCUSize - 1) && // is not at the last column of LCU
-                (absPartIdx / numPartInCUSize < numPartInCUSize - 1))   // is not at the last row    of LCU
+            bool bNotLastCol = RasterAddress::lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of LCU
+            bool bNotLastRow = RasterAddress::lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row    of LCU
+
+            if (bNotLastCol && bNotLastRow)
             {
-                absPartAddr = g_rasterToZscan[absPartIdx + numPartInCUSize + 1];
+                absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
                 lcuIdx = getAddr();
             }
-            else if (absPartIdx % numPartInCUSize < numPartInCUSize - 1) // is not at the last column of LCU But is last row of LCU
-                absPartAddr = g_rasterToZscan[(absPartIdx + numPartInCUSize + 1) % m_pic->getNumPartInCU()];
-            else if (absPartIdx / numPartInCUSize < numPartInCUSize - 1) // is not at the last row of LCU But is last column of LCU
+            else if (bNotLastCol)
+                absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
+            else if (bNotLastRow)
             {
-                absPartAddr = g_rasterToZscan[absPartIdx + 1];
+                absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
                 lcuIdx = getAddr() + 1;
             }
             else // is the right bottom corner of LCU
                 absPartAddr = 0;
         }
-        if (lcuIdx >= 0 && xGetColMVP(picList, lcuIdx, absPartAddr, colmv, refIdxCol))
+        if (lcuIdx >= 0 && xGetColMVP(picList, lcuIdx, absPartAddr, colmv, refIdx))
         {
             amvpCand[num++] = colmv;
             mvc[numMvc++] = colmv;
             uint32_t partIdxCenter;
             uint32_t curLCUIdx = getAddr();
             xDeriveCenterIdx(partIdx, partIdxCenter);
-            if (xGetColMVP(picList, curLCUIdx, partIdxCenter, colmv, refIdxCol))
+            if (xGetColMVP(picList, curLCUIdx, partIdxCenter, colmv, refIdx))
             {
                 amvpCand[num++] = colmv;
                 mvc[numMvc++] = colmv;

source/Lib/TLibCommon/TComRom.cpp

 uint32_t g_maxCUDepth    = NUM_CU_DEPTH - 1;
 uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
 uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
-uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
-uint32_t g_rasterToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
+
+const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+{
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60,
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60
+};
+
+const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+{
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60
+};
 
 const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
 
     }
 }
 
-void initRasterToPelXY(uint32_t maxFullDepth)
-{
-    uint32_t i;
-
-    uint32_t* tempX = &g_rasterToPelX[0];
-    uint32_t* tempY = &g_rasterToPelY[0];
-
-    uint32_t numPartInCUSize = 1 << maxFullDepth;
-    uint32_t numPartitions   = 1 << maxFullDepth * 2;
-
-    tempX[0] = 0;
-    tempX++;
-    for (i = 1; i < numPartInCUSize; i++)
-    {
-        tempX[0] = tempX[-1] + UNIT_SIZE;
-        tempX++;
-    }
-
-    for (i = 1; i < numPartInCUSize; i++)
-    {
-        memcpy(tempX, tempX - numPartInCUSize, sizeof(uint32_t) * numPartInCUSize);
-        tempX += numPartInCUSize;
-    }
-
-    for (i = 1; i < numPartitions; i++)
-    {
-        tempY[i] = (i >> maxFullDepth) * UNIT_SIZE;
-    }
-}
-
 const int16_t g_lumaFilter[4][NTAPS_LUMA] =
 {
     {  0, 0,   0, 64,  0,   0, 0,  0 },

source/Lib/TLibCommon/TComRom.h

 void initRasterToZscan(uint32_t maxFullDepth);
 
 // conversion of partition index to picture pel position
-extern uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern uint32_t g_rasterToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-
-void initRasterToPelXY(uint32_t maxFullDepth);
+extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
 
 // global variable (LCU width/height, max. CU depth)
 extern uint32_t g_maxLog2CUSize;

source/Lib/TLibCommon/TComYuv.h

     int m_vChromaShift;
     int m_csp;
 
-    int getChromaAddrOffset(uint32_t partUnitIdx, uint32_t width)
+    int getChromaAddrOffset(uint32_t idx, uint32_t width)
     {
-        int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]] >> m_hChromaShift;
-        int blkY = g_rasterToPelY[g_zscanToRaster[partUnitIdx]] >> m_vChromaShift;
+        int blkX = g_zscanToPelX[idx] >> m_hChromaShift;
+        int blkY = g_zscanToPelY[idx] >> m_vChromaShift;
 
         return blkX + blkY * width;
     }
 
-    static int getAddrOffset(uint32_t partUnitIdx, uint32_t width)
+    static int getAddrOffset(uint32_t idx, uint32_t width)
     {
-        int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]];
-        int blkY = g_rasterToPelY[g_zscanToRaster[partUnitIdx]];
+        int blkX = g_zscanToPelX[idx];
+        int blkY = g_zscanToPelY[idx];
 
         return blkX + blkY * width;
     }

source/common/deblock.cpp

 
     Frame* pic = cu->m_pic;
     uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
-    uint32_t qNumParts   = curNumParts >> 2;
 
     if (cu->getDepth(absZOrderIdx) > depth)
     {
+        uint32_t qNumParts   = curNumParts >> 2;
+        uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
+        uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
         {
-            uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absZOrderIdx]];
-            uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absZOrderIdx]];
-            if ((lpelx < cu->m_slice->m_sps->picWidthInLumaSamples) && (tpely < cu->m_slice->m_sps->picHeightInLumaSamples))
+            if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
                 deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
         }
         return;
 
 void Deblock::setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params)
 {
-    uint32_t x = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absZOrderIdx]];
-    uint32_t y = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absZOrderIdx]];
+    uint32_t x = cu->getCUPelX() + g_zscanToPelX[absZOrderIdx];
+    uint32_t y = cu->getCUPelY() + g_zscanToPelY[absZOrderIdx];
 
     TComDataCU* tempCU;
     uint32_t    tempPartIdx;

source/common/param.cpp

         uint32_t* tmp = &g_zscanToRaster[0];
         initZscanToRaster(g_maxFullDepth, 1, 0, tmp);
         initRasterToZscan(g_maxFullDepth);
-
-        // initialize conversion matrix from partition index to pel
-        initRasterToPelXY(g_maxFullDepth);
     }
     return 0;
 }

source/common/shortyuv.h

     ShortYuv();
     ~ShortYuv();
 
-    int getChromaAddrOffset(uint32_t partUnitIdx, uint32_t width)
+    int getChromaAddrOffset(uint32_t idx, uint32_t width)
     {
-        int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]] >> m_hChromaShift;
-        int blkY = g_rasterToPelY[g_zscanToRaster[partUnitIdx]] >> m_vChromaShift;
+        int blkX = g_zscanToPelX[idx] >> m_hChromaShift;
+        int blkY = g_zscanToPelY[idx] >> m_vChromaShift;
 
         return blkX + blkY * width;
     }
 
     static int getAddrOffset(uint32_t idx, uint32_t width)
     {
-        int blkX = g_rasterToPelX[g_zscanToRaster[idx]];
-        int blkY = g_rasterToPelY[g_zscanToRaster[idx]];
+        int blkX = g_zscanToPelX[idx];
+        int blkY = g_zscanToPelY[idx];
 
         return blkX + blkY * width;
     }

source/common/slice.cpp

         bUsed[k] = used;
     }
 }
+
+uint32_t Slice::realEndAddress(uint32_t endCUAddr)
+{
+    // Calculate end address
+    uint32_t internalAddress = (endCUAddr - 1) % m_pic->getNumPartInCU();
+    uint32_t externalAddress = (endCUAddr - 1) / m_pic->getNumPartInCU();
+    uint32_t xmax = m_sps->picWidthInLumaSamples  - (externalAddress % m_pic->getFrameWidthInCU()) * g_maxCUSize;
+    uint32_t ymax = m_sps->picHeightInLumaSamples - (externalAddress / m_pic->getFrameWidthInCU()) * g_maxCUSize;
+
+    while (g_zscanToPelX[internalAddress] >= xmax || g_zscanToPelY[internalAddress] >= ymax)
+        internalAddress--;
+
+    internalAddress++;
+    if (internalAddress == m_pic->getNumPartInCU())
+    {
+        internalAddress = 0;
+        externalAddress++;
+    }
+
+    return externalAddress * m_pic->getNumPartInCU() + internalAddress;
+}
+
+

source/common/slice.h

     bool isInterB() const { return m_sliceType == B_SLICE; }
 
     bool isInterP() const { return m_sliceType == P_SLICE; }
+
+    uint32_t realEndAddress(uint32_t endCUAddr);
 };
 
 #define IS_REFERENCED(slice) (slice->m_pic->m_lowres.sliceType != X265_TYPE_B) 

source/encoder/analysis.cpp

     Slice* slice = outTempCU->m_slice;
     if (!bInsidePicture)
     {
-        int cuSize = 1 << outTempCU->getLog2CUSize(0);
+        uint32_t cuSize = 1 << outTempCU->getLog2CUSize(0);
         uint32_t lpelx = outTempCU->getCUPelX();
         uint32_t tpely = outTempCU->getCUPelY();
         uint32_t rpelx = lpelx + cuSize;
         uint32_t nextDepth = depth + 1;
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
+        uint32_t xmax = slice->m_sps->picWidthInLumaSamples  - lcu->getCUPelX();
+        uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
-            uint32_t lpelx = lcu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
-            uint32_t tpely = lcu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-            if ((lpelx < slice->m_sps->picWidthInLumaSamples) &&
-                (tpely < slice->m_sps->picHeightInLumaSamples))
+            if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
             {
-                subTempPartCU->copyToSubCU(cu, partUnitIdx, depth + 1);
-                encodeResidue(lcu, subTempPartCU, absPartIdx, depth + 1);
+                subTempPartCU->copyToSubCU(cu, partUnitIdx, nextDepth);
+                encodeResidue(lcu, subTempPartCU, absPartIdx, nextDepth);
             }
         }
 

source/encoder/encoder.cpp

         else
         {
             fenc->allocPicSym(m_param);
-            fenc->m_picSym->m_slice->m_sps = &m_sps;
-            fenc->m_picSym->m_slice->m_pps = &m_pps;
-            fenc->m_picSym->m_slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
-            fenc->m_picSym->m_slice->m_endCUAddr = fenc->getNumCUsInFrame() * fenc->getNumPartInCU();
+            Slice* slice = fenc->m_picSym->m_slice;
+            slice->m_pic = fenc;
+            slice->m_sps = &m_sps;
+            slice->m_pps = &m_pps;
+            slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
+            slice->m_endCUAddr = slice->realEndAddress(fenc->getNumCUsInFrame() * fenc->getNumPartInCU());
         }
         curEncoder->m_rce.encodeOrder = m_encodedFrameNum++;
         if (m_bframeDelay)

source/encoder/entropy.cpp

     Frame* pic = cu->m_pic;
     Slice* slice = cu->m_slice;
 
+    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
+        bEncodeDQP = true;
+
     if (!bInsidePicture)
     {
-        uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
-        uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-        uint32_t rpelx = lpelx + (g_maxCUSize >> depth);
-        uint32_t bpely = tpely + (g_maxCUSize >> depth);
-        bInsidePicture = (rpelx <= slice->m_sps->picWidthInLumaSamples &&
-                          bpely <= slice->m_sps->picHeightInLumaSamples);
+        uint32_t xmax = slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
+        uint32_t ymax = slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
+        uint32_t cuSize = g_maxCUSize >> depth;
+
+        bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize <= xmax &&
+                          g_zscanToPelY[absPartIdx] + cuSize <= ymax);
+
+        if (!bInsidePicture)
+        {
+            uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
+            for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
+            {
+                if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
+                    encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP);
+            }
+
+            return;
+        }
     }
 
     // We need to split, so don't try these modes.
     if (bInsidePicture && depth < g_maxCUDepth)
         codeSplitFlag(cu, absPartIdx, depth);
 
-    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
-        bEncodeDQP = true;
-
-    if (!bInsidePicture)
-    {
-        uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
-
-        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
-        {
-            uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
-            uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-            if ((lpelx < slice->m_sps->picWidthInLumaSamples) &&
-                (tpely < slice->m_sps->picHeightInLumaSamples))
-            {
-                encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP);
-            }
-        }
-
-        return;
-    }
-
     if (depth < cu->getDepth(absPartIdx) && depth < g_maxCUDepth)
     {
         uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
 /* finish encoding a cu and handle end-of-slice conditions */
 void Entropy::finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth)
 {
-    Frame* pic = cu->m_pic;
     Slice* slice = cu->m_slice;
 
     // Calculate end address
+    X265_CHECK(slice->m_endCUAddr == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
+    uint32_t realEndAddress = slice->m_endCUAddr;
     uint32_t cuAddr = cu->getSCUAddr() + absPartIdx;
 
-    uint32_t internalAddress = (slice->m_endCUAddr - 1) % pic->getNumPartInCU();
-    uint32_t externalAddress = (slice->m_endCUAddr - 1) / pic->getNumPartInCU();
-    uint32_t posx = (externalAddress % pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelX[g_zscanToRaster[internalAddress]];
-    uint32_t posy = (externalAddress / pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]];
-    uint32_t width = slice->m_sps->picWidthInLumaSamples;
-    uint32_t height = slice->m_sps->picHeightInLumaSamples;
-    uint32_t cuSize = 1 << cu->getLog2CUSize(absPartIdx);
-
-    while (posx >= width || posy >= height)
-    {
-        internalAddress--;
-        posx = (externalAddress % pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelX[g_zscanToRaster[internalAddress]];
-        posy = (externalAddress / pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]];
-    }
-
-    internalAddress++;
-    if (internalAddress == cu->m_pic->getNumPartInCU())
-    {
-        internalAddress = 0;
-        externalAddress = (externalAddress + 1);
-    }
-    uint32_t realEndAddress = (externalAddress * pic->getNumPartInCU() + internalAddress);
-
     // Encode slice finish
     bool bTerminateSlice = false;
     if (cuAddr + (cu->m_pic->getNumPartInCU() >> (depth << 1)) == realEndAddress)
         bTerminateSlice = true;
 
-    uint32_t granularityWidth = g_maxCUSize;
-    posx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
-    posy = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-    bool granularityBoundary = ((posx + cuSize) % granularityWidth == 0 || (posx + cuSize == width))
-                            && ((posy + cuSize) % granularityWidth == 0 || (posy + cuSize == height));
+    uint32_t granularityMask = g_maxCUSize - 1;
+    uint32_t cuSize = 1 << cu->getLog2CUSize(absPartIdx);
+    uint32_t rpelx = cu->getCUPelX() + g_zscanToPelX[absPartIdx] + cuSize;
+    uint32_t bpely = cu->getCUPelY() + g_zscanToPelY[absPartIdx] + cuSize;
+    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
+                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
 
     if (granularityBoundary)
     {

source/encoder/sao.cpp

 /* Original YUV restoration for CU in lossless coding */
 void origCUSampleRestoration(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth)
 {
-    Frame* pic = cu->m_pic;
-    uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
-    uint32_t qNumParts   = curNumParts >> 2;
-
     // go to sub-CU
     if (cu->getDepth(absZOrderIdx) > depth)
     {
+        Frame* pic = cu->m_pic;
+        uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
+        uint32_t qNumParts   = curNumParts >> 2;
+        uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
+        uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
         {
-            uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absZOrderIdx]];
-            uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absZOrderIdx]];
-            if ((lpelx < cu->m_slice->m_sps->picWidthInLumaSamples) && (tpely < cu->m_slice->m_sps->picHeightInLumaSamples))
+            if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
                 origCUSampleRestoration(cu, absZOrderIdx, depth + 1);
         }