[MPlayer-dev-eng] [PATCH] Improve NV12/NV21 support

Mon Feb 14 08:51:17 CET 2005

On Mon, Feb 14, 2005 at 12:22:42AM +0100, Michael Niedermayer wrote:
> Hi
> 
> On Sunday 13 February 2005 02:50, Ville Syrjälä wrote:
> > The attached patch tries to make NV12/NV21 support a bit more functional.
> >
> > My aim was just to get it working with DirectFB + Matrox BES so I'm not
> > really interested in software scaling.
> >
<snip> 
> > - Added vDest != NULL tests to swscale since it can't actually handle
> >   NV12/NV21. So only luma will be scaled but at least it won't crash.
> 
> rejected, fix the code so it handles chroma or leave it as it is

Ok. I attached a new version which handles chroma too. C implementation 
only.

> >+       case IMGFMT_NV12:
> >+       case IMGFMT_NV21:
> >+               *h=1;
> >+               *v=1;
> >+               break;
> 
> ugh, add these to the already existing case for h/v=1

Fixed.

-- 
Ville Syrjälä
syrjala at sci.fi
http://www.sci.fi/~syrjala/
-------------- next part --------------
diff -urN main/libmpcodecs/vf.c main/libmpcodecs/vf.c

--- main/libmpcodecs/vf.c	2005-01-26 00:53:46.000000000 +0200
+++ main/libmpcodecs/vf.c	2005-02-13 01:35:45.000000000 +0200
@@ -356,6 +356,7 @@
 	      //if(!mpi->stride[0]) 
 	      mpi->stride[0]=mpi->width;
 	      //if(!mpi->stride[1]) 
+	      if(mpi->num_planes > 2){
 	      mpi->stride[1]=mpi->stride[2]=mpi->chroma_width;
 	      if(mpi->flags&MP_IMGFLAG_SWAPPED){
 	          // I420/IYUV  (Y,U,V)
@@ -366,6 +367,11 @@
 	          mpi->planes[2]=mpi->planes[0]+mpi->width*mpi->height;
 	          mpi->planes[1]=mpi->planes[2]+mpi->chroma_width*mpi->chroma_height;
 	      }
+	      } else {
+	          // NV12/NV21
+	          mpi->stride[1]=mpi->chroma_width;
+	          mpi->planes[1]=mpi->planes[0]+mpi->width*mpi->height;
+	      }
 	  } else {
 	      //if(!mpi->stride[0]) 
 	      mpi->stride[0]=mpi->width*mpi->bpp/8;
diff -urN main/libmpcodecs/vf_eq.c main/libmpcodecs/vf_eq.c
--- main/libmpcodecs/vf_eq.c	2004-10-25 20:05:55.000000000 +0300
+++ main/libmpcodecs/vf_eq.c	2005-02-13 01:36:02.000000000 +0200
@@ -188,6 +188,7 @@
 	case IMGFMT_Y800:
 	case IMGFMT_Y8:
 	case IMGFMT_NV12:
+	case IMGFMT_NV21:
 	case IMGFMT_444P:
 	case IMGFMT_422P:
 	case IMGFMT_411P:
diff -urN main/libmpcodecs/vf_hue.c main/libmpcodecs/vf_hue.c
--- main/libmpcodecs/vf_hue.c	2004-10-05 23:19:47.000000000 +0300
+++ main/libmpcodecs/vf_hue.c	2005-02-13 01:36:32.000000000 +0200
@@ -130,7 +130,6 @@
 	case IMGFMT_I420:
 	case IMGFMT_IYUV:
 	case IMGFMT_CLPL:
-	case IMGFMT_NV12:
 	case IMGFMT_444P:
 	case IMGFMT_422P:
 	case IMGFMT_411P:
diff -urN main/libmpcodecs/vf_scale.c main/libmpcodecs/vf_scale.c
--- main/libmpcodecs/vf_scale.c	2004-11-15 23:25:47.000000000 +0200
+++ main/libmpcodecs/vf_scale.c	2005-02-13 01:37:37.000000000 +0200
@@ -56,6 +56,8 @@
     IMGFMT_YVU9,
     IMGFMT_IF09,
     IMGFMT_411P,
+    IMGFMT_NV12,
+    IMGFMT_NV21,
     IMGFMT_YUY2,
     IMGFMT_UYVY,
 // RGB and grayscale (Y8 and Y800):
@@ -175,6 +177,8 @@
     case IMGFMT_YV12:		/* YV12 needs w & h rounded to 2 */
     case IMGFMT_I420:
     case IMGFMT_IYUV:
+    case IMGFMT_NV12:
+    case IMGFMT_NV21:
       vf->priv->h = (vf->priv->h + 1) & ~1;
     case IMGFMT_YUY2:		/* YUY2 needs w rounded to 2 */
     case IMGFMT_UYVY:
diff -urN main/libmpcodecs/vf_spp.c main/libmpcodecs/vf_spp.c
--- main/libmpcodecs/vf_spp.c	2004-11-26 11:30:00.000000000 +0200
+++ main/libmpcodecs/vf_spp.c	2005-02-13 01:38:19.000000000 +0200
@@ -528,7 +528,6 @@
 	case IMGFMT_CLPL:
 	case IMGFMT_Y800:
 	case IMGFMT_Y8:
-	case IMGFMT_NV12:
 	case IMGFMT_444P:
 	case IMGFMT_422P:
 	case IMGFMT_411P:
@@ -546,7 +545,6 @@
 	IMGFMT_CLPL,
 	IMGFMT_Y800,
 	IMGFMT_Y8,
-	IMGFMT_NV12,
 	IMGFMT_444P,
 	IMGFMT_422P,
 	IMGFMT_411P,
diff -urN main/postproc/swscale.c main/postproc/swscale.c
--- main/postproc/swscale.c	2005-02-13 01:25:41.000000000 +0200
+++ main/postproc/swscale.c	2005-02-14 09:18:24.000000000 +0200
@@ -100,6 +100,7 @@
 
 //FIXME replace this with something faster
 #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
+			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21 \
 			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
 #define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
 #define isGray(x)      ((x)==IMGFMT_Y800)
@@ -113,6 +114,7 @@
 #define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
 			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
 			|| isRGB(x) || isBGR(x)\
+			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
 			|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
 #define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
 
@@ -250,6 +252,56 @@
 		}
 }
 
+static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+				int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+				uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+{
+	//FIXME Optimize (just quickly writen not opti..)
+	int i;
+	for(i=0; i<dstW; i++)
+	{
+		int val=1<<18;
+		int j;
+		for(j=0; j<lumFilterSize; j++)
+			val += lumSrc[j][i] * lumFilter[j];
+
+		dest[i]= MIN(MAX(val>>19, 0), 255);
+	}
+
+	if(uDest == NULL)
+		return;
+
+	if(dstFormat == IMGFMT_NV12)
+		for(i=0; i<chrDstW; i++)
+		{
+			int u=1<<18;
+			int v=1<<18;
+			int j;
+			for(j=0; j<chrFilterSize; j++)
+			{
+				u += chrSrc[j][i] * chrFilter[j];
+				v += chrSrc[j][i + 2048] * chrFilter[j];
+			}
+
+			uDest[2*i]= MIN(MAX(u>>19, 0), 255);
+			uDest[2*i+1]= MIN(MAX(v>>19, 0), 255);
+		}
+	else
+		for(i=0; i<chrDstW; i++)
+		{
+			int u=1<<18;
+			int v=1<<18;
+			int j;
+			for(j=0; j<chrFilterSize; j++)
+			{
+				u += chrSrc[j][i] * chrFilter[j];
+				v += chrSrc[j][i + 2048] * chrFilter[j];
+			}
+
+			uDest[2*i]= MIN(MAX(v>>19, 0), 255);
+			uDest[2*i+1]= MIN(MAX(u>>19, 0), 255);
+		}
+}
 
 #define YSCALE_YUV_2_PACKEDX_C(type) \
 		for(i=0; i<(dstW>>1); i++){\
@@ -1378,13 +1430,16 @@
 		uint8_t *dstPtr= dst;
 		for(i=0; i<srcSliceH; i++)
 		{
-			memcpy(dstPtr, srcPtr, srcStride[0]);
+			memcpy(dstPtr, srcPtr, c->srcW);
 			srcPtr+= srcStride[0];
 			dstPtr+= dstStride[0];
 		}
 	}
-	dst = dstParam[1] + dstStride[1]*srcSliceY;
-	interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
+	dst = dstParam[1] + dstStride[1]*srcSliceY/2;
+	if (c->dstFormat == IMGFMT_NV12)
+		interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
+	else
+		interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
 
 	return srcSliceH;
 }
@@ -1554,6 +1609,15 @@
 		sortedStride[0]= stride[0];
 		sortedStride[1]= stride[1];
 		sortedStride[2]= stride[2];
+	}
+	else if(format == IMGFMT_NV12 || format == IMGFMT_NV21)
+	{
+		sortedP[0]= p[0];
+		sortedP[1]= p[1];
+		sortedP[2]= NULL;
+		sortedStride[0]= stride[0];
+		sortedStride[1]= stride[1];
+		sortedStride[2]= 0;
 	}else{
 		MSG_ERR("internal error in orderYUV\n");
 	}
@@ -1644,6 +1708,8 @@
 		break;
 	case IMGFMT_YV12:
 	case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
+	case IMGFMT_NV12:
+	case IMGFMT_NV21:
 		*h=1;
 		*v=1;
 		break;
@@ -1872,7 +1938,7 @@
 	if(unscaled && !usesHFilter && !usesVFilter)
 	{
 		/* yv12_to_nv12 */
-		if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
+		if(srcFormat == IMGFMT_YV12 && (dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21))
 		{
 			c->swScale= PlanarToNV12Wrapper;
 		}
diff -urN main/postproc/swscale_template.c main/postproc/swscale_template.c
--- main/postproc/swscale_template.c	2005-01-26 00:53:50.000000000 +0200
+++ main/postproc/swscale_template.c	2005-02-14 09:43:39.000000000 +0200
@@ -796,6 +796,15 @@
 #endif
 }
 
+static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+				     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+				     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+{
+yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
+	     chrFilter, chrSrc, chrFilterSize,
+	     dest, uDest, dstW, chrDstW, dstFormat);
+}
+
 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
 				    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
 {
@@ -2792,7 +2801,15 @@
 				((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
 		}
 #endif
-		if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like
+		if(dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21){
+			const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+			if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+			RENAME(yuv2nv12X)(c,
+				vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+				vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+				dest, uDest, dstW, chrDstW, dstFormat);
+		}
+		else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like
 		{
 			const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
 			if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
@@ -2840,7 +2857,15 @@
 	    {
 		int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
 		int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-		if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12
+		if(dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21){
+			const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+			if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+			yuv2nv12XinC(
+				vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+				vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+				dest, uDest, dstW, chrDstW, dstFormat);
+		}
+		else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12
 		{
 			const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
 			if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi