VTM采用了跨分量线性模型的色度预测模式,即色度值可根据已重建亮度像素线性变换得到,变换函数为pred_C (i,j)=α•rec_L'(i,j)+β,其中pred_C为预测的色度值,rec_L'(i,j)表示已重建的亮度下采样后的值。参数α和β通过相邻CU的亮度和色度计算得到,计算公式为 α = Y a − Y b X a − X b α = \frac{Y_a - Y_b}{X_a - X_b} α=Xa−XbYa−Yb, β = Y b − α × X b β = Y_b - α \times X_b β=Yb−α×Xb。其中, Y a Y_a Ya 是相邻亮度像素下采样后最大亮度像素的亮度值, X a 是 Y a X_a 是Y_a Xa是Ya对应的色度值。 Y b Y_b Yb 是相邻亮度像素下采样后最小亮度像素的亮度值, X b 是 Y b X_b 是Y_b Xb是Yb对应的色度值。下图给出一个CCLM预测的例子 VTM中计算α的除法操作通过查表获得,以降低计算复杂度。为了减少表的存储空间,最大最小的差值 diff 和 α 以指数形式存储和计算。例如,diff近似为4位有效位和一个指数量级。因此,1/diff 的表格元素可减少为16 个,如下 D i v T a b l e [ ] = 0 , 7 , 6 , 5 , 5 , 4 , 4 , 3 , 3 , 2 , 2 , 1 , 1 , 1 , 1 , 0 DivTable [ ] = { 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 } DivTable[]=0,7,6,5,5,4,4,3,3,2,2,1,1,1,1,0 总之,这种计算方式不仅减少了计算复杂度还减少了存储空间。
除了亮度所有的上方参考像素和左侧参考像素联合计算线性模型的参数外,模型参数还有另外两种计算方式,即CCLM还有两位两种模式,称之为LM_A和LM_L模式。
LM_A模式仅使用已重建亮度像素的上方元素作为参考像素计算线性模型参数,为了获取足够参考像素,上方亮度参考像素扩展为W+H个LM_L模式仅使用已重建像素左侧元素计算线性模型参数,同样,左侧参考像素扩展为H+W个对于4:2:0色度采样的序列,需要对对亮度信息进行下采样,VTM支持两种2:1采样方式。采样滤波器的选择由SPS指出,分表标识为“type-0”和“type-2”,如下 (1.1) (1.2) 线性模型参数的计算过程不仅在编码端进行,解码端同样进行相同的操作,因此参数alpha和belta不用记录在码流中。
帧内色度模式的编码依赖于相应的亮度块,因为VTM中I帧的亮度和色度分量可能会有不同的划分结构,一个色度块可能对应多个亮度块。因此,对于色度的DM模式,使用当前色度块中心像素对应的亮度块的帧内预测模式。
过程如下:
初始化具体实现由函数xGetLumaRecPixels()完成,其代码及注释如下:
void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chromaArea) { int iDstStride = 0; Pel* pDst0 = 0; // 目标亮度重建信号首地址 int curChromaMode = pu.intraDir[1]; if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) { //LM_L和LM_A使用一侧参考像素,长度W+H iDstStride = 2 * MAX_CU_SIZE + 1; pDst0 = m_pMdlmTemp + iDstStride + 1; } else { //CCLM使用两侧参考像素 iDstStride = MAX_CU_SIZE + 1; pDst0 = m_piTemp + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES; } //assert 420 chroma subsampling //获取亮度cu区域,如果chroma区域对应多luma区域,取中心点出的luma cu CompArea lumaArea = CompArea( COMPONENT_Y, pu.chromaFormat, chromaArea.lumaPos(), recalcSize( pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus) CHECK( lumaArea.width == chromaArea.width, "" ); CHECK( lumaArea.height == chromaArea.height, "" ); const SizeType uiCWidth = chromaArea.width; const SizeType uiCHeight = chromaArea.height; const CPelBuf Src = pu.cs->picture->getRecoBuf( lumaArea ); //亮度重建缓存 Pel const* pRecSrc0 = Src.bufAt( 0, 0 ); //亮度重建首地址 int iRecStride = Src.stride; int iRecStride2 = iRecStride << 1; const CodingUnit& lumaCU = isChroma( pu.chType ) ? *pu.cs->picture->cs->getCU( lumaArea.pos(), CH_L ) : *pu.cu; //luma CU const CodingUnit& cu = *pu.cu; //chroma pu所属cu const CompArea& area = isChroma( pu.chType ) ? chromaArea : lumaArea; const uint32_t uiTuWidth = area.width; //chroma size const uint32_t uiTuHeight = area.height; int iBaseUnitSize = ( 1 << MIN_CU_LOG2 ); //4 const int iUnitWidth = iBaseUnitSize >> getComponentScaleX( area.compID, area.chromaFormat ); //2 const int iUnitHeight = iBaseUnitSize >> getComponentScaleX( area.compID, area.chromaFormat ); //2 const int iTUWidthInUnits = uiTuWidth / iUnitWidth; const int iTUHeightInUnits = uiTuHeight / iUnitHeight; const int iAboveUnits = iTUWidthInUnits; const int iLeftUnits = iTUHeightInUnits; const int chromaUnitWidth = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, area.chromaFormat); //2 const int chromaUnitHeight = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, area.chromaFormat); //2 const int topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H. const int leftTemplateSampNum = 2 * uiCHeight; assert(m_topRefLength >= topTemplateSampNum); assert(m_leftRefLength >= leftTemplateSampNum); const int totalAboveUnits = (topTemplateSampNum + (chromaUnitWidth - 1)) / chromaUnitWidth; const int totalLeftUnits = (leftTemplateSampNum + (chromaUnitHeight - 1)) / chromaUnitHeight; const int totalUnits = totalLeftUnits + totalAboveUnits + 1; const int aboveRightUnits = totalAboveUnits - iAboveUnits; const int leftBelowUnits = totalLeftUnits - iLeftUnits; /*********** 有效unit及对应flag设置 ***************/ int avaiAboveRightUnits = 0; int avaiLeftBelowUnits = 0; bool bNeighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1]; memset(bNeighborFlags, 0, totalUnits); bool bAboveAvaillable, bLeftAvaillable; int availlableUnit = isLeftAvailable( isChroma( pu.chType ) ? cu : lumaCU, toChannelType( area.compID ), area.pos(), iLeftUnits, iUnitHeight, ( bNeighborFlags + iLeftUnits + leftBelowUnits - 1 ) ); bLeftAvaillable = availlableUnit == iTUHeightInUnits; availlableUnit = isAboveAvailable( isChroma( pu.chType ) ? cu : lumaCU, toChannelType( area.compID ), area.pos(), iAboveUnits, iUnitWidth, ( bNeighborFlags + iLeftUnits + leftBelowUnits + 1 ) ); bAboveAvaillable = availlableUnit == iTUWidthInUnits; if (bLeftAvaillable) // if left is not available, then the below left is not available { avaiLeftBelowUnits = isBelowLeftAvailable(isChroma(pu.chType) ? cu : lumaCU, toChannelType(area.compID), area.bottomLeftComp(area.compID), leftBelowUnits, iUnitHeight, (bNeighborFlags + leftBelowUnits - 1)); } if (bAboveAvaillable) // if above is not available, then the above right is not available. { avaiAboveRightUnits = isAboveRightAvailable(isChroma(pu.chType) ? cu : lumaCU, toChannelType(area.compID), area.topRightComp(area.compID), aboveRightUnits, iUnitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + iAboveUnits + 1)); } Pel* pDst = nullptr; Pel const* piSrc = nullptr; bool isFirstRowOfCtu = ((pu.block(COMPONENT_Cb).y)&(((pu.cs->sps)->getMaxCUWidth() >> 1) - 1)) == 0; //pu是否在ctu第一行 if( bAboveAvaillable ) // 上方参考像素下采样 { pDst = pDst0 - iDstStride; //重建上一行第一个像素 int addedAboveRight = 0; //右上有效像素个数 if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) { addedAboveRight = avaiAboveRightUnits*chromaUnitWidth; } for (int i = 0; i < uiCWidth + addedAboveRight; i++) { if (isFirstRowOfCtu) // 第一行pu,亮度参考进行[1,2,1]系数2倍下采样 { piSrc = pRecSrc0 - iRecStride; if (i == 0 && !bLeftAvaillable) { pDst[i] = piSrc[2 * i]; } else { pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - 1] + piSrc[2 * i + 1] + 2 ) >> 2; } } #if JVET_M0142_CCLM_COLLOCATED_CHROMA else if( pu.cs->sps->getCclmCollocatedChromaFlag() ) { // 公式1.2 下采样 piSrc = pRecSrc0 - iRecStride2; if( i == 0 && !bLeftAvaillable ) { pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - iRecStride] + piSrc[2 * i + iRecStride] + 2 ) >> 2; } else { pDst[i] = ( piSrc[2 * i - iRecStride] + piSrc[2 * i ] * 4 + piSrc[2 * i - 1] + piSrc[2 * i + 1] + piSrc[2 * i + iRecStride] + 4 ) >> 3; } } #endif else { // 公式1.1下采样 piSrc = pRecSrc0 - iRecStride2; if (i == 0 && !bLeftAvaillable) { pDst[i] = ( piSrc[2 * i] + piSrc[2 * i + iRecStride] + 1 ) >> 1; } else { pDst[i] = ( ( ( piSrc[2 * i ] * 2 ) + piSrc[2 * i - 1 ] + piSrc[2 * i + 1 ] ) + ( ( piSrc[2 * i + iRecStride] * 2 ) + piSrc[2 * i - 1 + iRecStride] + piSrc[2 * i + 1 + iRecStride] ) + 4 ) >> 3; } } } } if( bLeftAvaillable ) // 左侧参考像素下采样 { pDst = pDst0 - 1; piSrc = pRecSrc0 - 3; int addedLeftBelow = 0; if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) { addedLeftBelow = avaiLeftBelowUnits*chromaUnitHeight; } for (int j = 0; j < uiCHeight + addedLeftBelow; j++) { #if JVET_M0142_CCLM_COLLOCATED_CHROMA if( pu.cs->sps->getCclmCollocatedChromaFlag() ) { if( j == 0 && !bAboveAvaillable ) { pDst[0] = ( piSrc[1] * 2 + piSrc[0] + piSrc[2] + 2 ) >> 2; } else // 公式1.2 { pDst[0] = ( piSrc[1 - iRecStride] + piSrc[1 ] * 4 + piSrc[0] + piSrc[2] + piSrc[1 + iRecStride] + 4 ) >> 3; } } else { // 公式1.1 #endif pDst[0] = ( ( piSrc[1 ] * 2 + piSrc[0 ] + piSrc[2 ] ) + ( piSrc[1 + iRecStride] * 2 + piSrc[iRecStride] + piSrc[2 + iRecStride] ) + 4 ) >> 3; #if JVET_M0142_CCLM_COLLOCATED_CHROMA } #endif piSrc += iRecStride2; pDst += iDstStride; } } // inner part from reconstructed picture buffer for( int j = 0; j < uiCHeight; j++ ) // 亮度下采样内部采样值 { for( int i = 0; i < uiCWidth; i++ ) { #if JVET_M0142_CCLM_COLLOCATED_CHROMA if( pu.cs->sps->getCclmCollocatedChromaFlag() ) { if( i == 0 && !bLeftAvaillable ) { if( j == 0 && !bAboveAvaillable ) { pDst0[i] = pRecSrc0[2 * i]; } else { pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - iRecStride] + pRecSrc0[2 * i + iRecStride] + 2 ) >> 2; } } else if( j == 0 && !bAboveAvaillable ) { pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] + 2 ) >> 2; } else { pDst0[i] = ( pRecSrc0[2 * i - iRecStride] + pRecSrc0[2 * i ] * 4 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] + pRecSrc0[2 * i + iRecStride] + 4 ) >> 3; } } else { #endif if( i == 0 && !bLeftAvaillable ) { pDst0[i] = ( pRecSrc0[2 * i] + pRecSrc0[2 * i + iRecStride] + 1 ) >> 1; } else { pDst0[i] = ( pRecSrc0[2 * i ] * 2 + pRecSrc0[2 * i + 1 ] + pRecSrc0[2 * i - 1 ] + pRecSrc0[2 * i + iRecStride] * 2 + pRecSrc0[2 * i + 1 + iRecStride] + pRecSrc0[2 * i - 1 + iRecStride] + 4 ) >> 3; } #if JVET_M0142_CCLM_COLLOCATED_CHROMA } #endif } pDst0 += iDstStride; pRecSrc0 += iRecStride2; } }通过函数predIntraChromaLM()实现,predIntraChromaLM() 通过调用xGetLMParameters获取线性模型参数,通过linearTransform()计算预测值。 代码及注释如下:
void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred, const PredictionUnit &pu, const CompArea& chromaArea, int intraDir) { int iLumaStride = 0; PelBuf Temp; // 亮度重建buffer(下采样之后) if ((intraDir == MDLM_L_IDX) || (intraDir == MDLM_T_IDX)) { iLumaStride = 2 * MAX_CU_SIZE + 1; Temp = PelBuf(m_pMdlmTemp + iLumaStride + 1, iLumaStride, Size(chromaArea)); } else { iLumaStride = MAX_CU_SIZE + 1; Temp = PelBuf(m_piTemp + iLumaStride + 1, iLumaStride, Size(chromaArea)); } int a, b, iShift; xGetLMParameters(pu, compID, chromaArea, a, b, iShift); // 计算线性模型尺度因子和偏移 // final prediction piPred.copyFrom(Temp); // 预测buffer初始化 piPred.linearTransform(a, iShift, b, true, pu.cs->slice->clpRng(compID)); // 计算预测值 }其中xGetLMParameters()主要功能是找到亮度重建像素中最大的亮度、最小亮度及其各自对应的色度值,然后根据查表计算出pred_C (i,j)=α•rec_L'(i,j)+β中的α和β。注释如下:(代码部分和xGetLumaRecPixels()重复度挺高,参照阅读)
void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const ComponentID compID, const CompArea &chromaArea, int &a, int &b, int &iShift) { CHECK(compID == COMPONENT_Y, ""); const SizeType cWidth = chromaArea.width; const SizeType cHeight = chromaArea.height; const Position posLT = chromaArea; CodingStructure & cs = *(pu.cs); const CodingUnit &cu = *(pu.cu); const SPS & sps = *cs.sps; const uint32_t tuWidth = chromaArea.width; const uint32_t tuHeight = chromaArea.height; const ChromaFormat nChromaFormat = sps.getChromaFormatIdc(); const int baseUnitSize = 1 << MIN_CU_LOG2; const int unitWidth = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat); const int unitHeight = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat); const int tuWidthInUnits = tuWidth / unitWidth; const int tuHeightInUnits = tuHeight / unitHeight; const int aboveUnits = tuWidthInUnits; const int leftUnits = tuHeightInUnits; int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H; int leftTemplateSampNum = 2 * cHeight; assert(m_topRefLength >= topTemplateSampNum); assert(m_leftRefLength >= leftTemplateSampNum); int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) / unitWidth; int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) / unitHeight; int totalUnits = totalLeftUnits + totalAboveUnits + 1; int aboveRightUnits = totalAboveUnits - aboveUnits; int leftBelowUnits = totalLeftUnits - leftUnits; int avaiAboveRightUnits = 0; int avaiLeftBelowUnits = 0; int avaiAboveUnits = 0; int avaiLeftUnits = 0; int curChromaMode = pu.intraDir[1]; bool neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1]; memset(neighborFlags, 0, totalUnits); bool aboveAvailable, leftAvailable; int availableUnit = isAboveAvailable(cu, CHANNEL_TYPE_CHROMA, posLT, aboveUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + 1)); aboveAvailable = availableUnit == tuWidthInUnits; availableUnit = isLeftAvailable(cu, CHANNEL_TYPE_CHROMA, posLT, leftUnits, unitHeight, (neighborFlags + leftUnits + leftBelowUnits - 1)); leftAvailable = availableUnit == tuHeightInUnits; if (leftAvailable) // if left is not available, then the below left is not available { avaiLeftUnits = tuHeightInUnits; avaiLeftBelowUnits = isBelowLeftAvailable(cu, CHANNEL_TYPE_CHROMA, chromaArea.bottomLeftComp(chromaArea.compID), leftBelowUnits, unitHeight, (neighborFlags + leftBelowUnits - 1)); } if (aboveAvailable) // if above is not available, then the above right is not available. { avaiAboveUnits = tuWidthInUnits; avaiAboveRightUnits = isAboveRightAvailable(cu, CHANNEL_TYPE_CHROMA, chromaArea.topRightComp(chromaArea.compID), aboveRightUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + aboveUnits + 1)); } Pel *srcColor0, *curChroma0; int srcStride, curStride; PelBuf temp; if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) { srcStride = 2 * MAX_CU_SIZE + 1; temp = PelBuf(m_pMdlmTemp + srcStride + 1, srcStride, Size(chromaArea)); } else { srcStride = MAX_CU_SIZE + 1; temp = PelBuf(m_piTemp + srcStride + 1, srcStride, Size(chromaArea)); } srcColor0 = temp.bufAt(0, 0); curChroma0 = getPredictorPtr(compID); curStride = m_topRefLength + 1; curChroma0 += curStride + 1; unsigned internalBitDepth = sps.getBitDepth(CHANNEL_TYPE_CHROMA); int minLuma[2] = { MAX_INT, 0 }; // 最小亮度及对应色度 int maxLuma[2] = { -MAX_INT, 0 }; // 最大亮度及对应色度 Pel *src = srcColor0 - srcStride; // Luma正上左一地址 Pel *cur = curChroma0 - curStride; // chroma正上左一地址 int minDim = 1; int actualTopTemplateSampNum = 0; int actualLeftTemplateSampNum = 0; if (curChromaMode == MDLM_T_IDX) { leftAvailable = 0; actualTopTemplateSampNum = unitWidth*(avaiAboveUnits + avaiAboveRightUnits); minDim = actualTopTemplateSampNum; } else if (curChromaMode == MDLM_L_IDX) { aboveAvailable = 0; actualLeftTemplateSampNum = unitHeight*(avaiLeftUnits + avaiLeftBelowUnits); minDim = actualLeftTemplateSampNum; } else if (curChromaMode == LM_CHROMA_IDX) { actualTopTemplateSampNum = cWidth; actualLeftTemplateSampNum = cHeight; minDim = leftAvailable && aboveAvailable ? 1 << g_aucPrevLog2[std::min(actualLeftTemplateSampNum, actualTopTemplateSampNum)] : 1 << g_aucPrevLog2[leftAvailable ? actualLeftTemplateSampNum : actualTopTemplateSampNum]; } int numSteps = minDim; /*************** 遍历上方像素 **************/ if (aboveAvailable) { for (int j = 0; j < numSteps; j++) { int idx = (j * actualTopTemplateSampNum) / minDim; if (minLuma[0] > src[idx]) { minLuma[0] = src[idx]; minLuma[1] = cur[idx]; } if (maxLuma[0] < src[idx]) { maxLuma[0] = src[idx]; maxLuma[1] = cur[idx]; } } } /*************** 遍历左侧像素 **************/ if (leftAvailable) { src = srcColor0 - 1; cur = curChroma0 - 1; for (int i = 0; i < numSteps; i++) { int idx = (i * actualLeftTemplateSampNum) / minDim; if (minLuma[0] > src[srcStride * idx]) { minLuma[0] = src[srcStride * idx]; minLuma[1] = cur[curStride * idx]; } if (maxLuma[0] < src[srcStride * idx]) { maxLuma[0] = src[srcStride * idx]; maxLuma[1] = cur[curStride * idx]; } } } // 除法查表(整数运算),计算a,b if (leftAvailable || aboveAvailable) { #if JVET_M0064_CCLM_SIMPLIFICATION int diff = maxLuma[0] - minLuma[0]; // (Ya - Yb) if (diff > 0) { int diffC = maxLuma[1] - minLuma[1]; // (Xa-Xb) int x = floorLog2( diff ); static const uint8_t DivSigTable[1 << 4] = { // 4bit significands - 8 ( MSB is omitted ) 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 }; int normDiff = (diff << 4 >> x) & 15; int v = DivSigTable[normDiff] | 8; x += normDiff != 0; int y = floorLog2( abs( diffC ) ) + 1; int add = 1 << y >> 1; a = (diffC * v + add) >> y; iShift = 3 + x - y; if ( iShift < 1 ) { iShift = 1; a = ( (a == 0)? 0: (a < 0)? -15 : 15 ); // a=Sign(a)*15 } b = minLuma[1] - ((a * minLuma[0]) >> iShift); } else { a = 0; b = minLuma[1]; iShift = 0; } #else // original a = 0; iShift = 16; int shift = (internalBitDepth > 8) ? internalBitDepth - 9 : 0; int add = shift ? 1 << (shift - 1) : 0; int diff = (maxLuma[0] - minLuma[0] + add) >> shift; if (diff > 0) { int div = ((maxLuma[1] - minLuma[1]) * g_aiLMDivTableLow[diff - 1] + 32768) >> 16; a = (((maxLuma[1] - minLuma[1]) * g_aiLMDivTableHigh[diff - 1] + div + add) >> shift); } b = minLuma[1] - ((a * minLuma[0]) >> iShift); #endif } else // 左上都没有有效参考,默认a = 0; b = 2^(bitDepth-1) { a = 0; b = 1 << (internalBitDepth - 1); iShift = 0; } }预测信号通过linearTransform()产生:
template<> void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng ) { const Pel* src = buf; Pel* dst = buf; if( width == 1 ) { THROW( "Blocks of width = 1 not supported" ); } #if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86) else if( ( width & 7 ) == 0 ) { g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); } else if( ( width & 3 ) == 0 ) { g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); } #endif else { #define LINTF_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset ) #define LINTF_INC \ src += stride; \ dst += stride; \ SIZE_AWARE_PER_EL_OP( LINTF_OP, LINTF_INC ); #undef RECO_OP #undef RECO_INC } }