/* 
 *  Deinterlacer.cpp - Original code by Donald Graft
 *                                      neuron2@home.com.
 *
 *	Copyright (C) Alberto Vigata - January 2000
 *
 *  This file is part of FlasKMPEG, a free MPEG to MPEG/AVI converter
 *	
 *  FlasKMPEG is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *   
 *  FlasKMPEG is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *   
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 */

#include "Deinterlacer.h"
#include "debug.h"
 

#define ASM_COPY(o,i,runsize)  __asm             \
{									\
__asm			mov ecx, runsize    \
__asm			mov esi, i          \
__asm           mov edi, o          \
__asm			rep movsd           \
}

typedef ui32 PixDim;


int FlDeinterlacer::DeInterlaceRGB32(CFrame *source, CFrame *dest) {

	const int		bitpitch = m_mfd.bitpitch;
	const long		pitch = source->GetPitch();
  const long    src_pitch = source->GetPitch();
  const long    dst_pitch = dest->GetPitch();
	const PixDim	w = source->GetWidth();
	const PixDim	h = source->GetHeight();
	Pixel32 *		src = (Pixel32 *)source->GetBuffer();
	Pixel32 *		dst = (Pixel32 *)dest->GetBuffer();


	if(!source || !dest)
		return 0;

	int x, y;
	long prevValue;
	Pixel32 p0, p1, p2;
	long r, g, b, r0, g0, b0;
	int *lumptr;

	/* If we are shifting field phase by one field... */
	if (m_mfd.fieldShift)
	{
		/* This mode is used typically to clean up PAL video which
		   has erroneouslt been digitized with the field phase off by
		   one field. The result is that the frames look interlaced,
		   but really if we can phase shift by one field, we'll get back
		   the original progressive frames. This code does that and then
		   skips motion processing. */
		/* Copy the even field of the current frame to the output. */
		for (y = 0; y < h/2; y++)
		{
			//memcpy(dst, src, w*4);
			ASM_COPY(src, dst, w);

			src = (Pixel *)((char *)src + 2 * src_pitch);
			dst = (Pixel *)((char *)dst + 2 * src_pitch);
		}
		/* If this is not the first frame, copy the buffered odd field
		   of the last frame to the output. This creates a correct progressive
		   output frame. If this is the first frame, a buffered field is not
		   available, so interpolate the odd field from the current even field. */
		if (m_mfd.first == TRUE)
		{
			src = (Pixel32 *)source->GetBuffer();
			dst = (Pixel *)((char *)dest->GetBuffer() + dst_pitch);
			for (y = 0; y < h/2; y++)
			{
				//memcpy(dst, src, w*4);
				
				ASM_COPY(dst,src , w);

				src = (Pixel *)((char *)src + 2 * src_pitch);
				dst = (Pixel *)((char *)dst + 2 * src_pitch);
			}
			m_mfd.first = FALSE;
		}
		else
		{
			lumptr = m_mfd.prevFrame + w;
			dst = (Pixel *)((char *)dest->GetBuffer() + dst_pitch);
			for (y = 0; y < h/2; y++)
			{
				//memcpy(dst, lumptr, w*4);
				
				ASM_COPY(dst, lumptr, w);

				lumptr += 2 * w;
				dst = (Pixel *)((char *)dst + 2 * dst_pitch);
			}
		}
		/* Finally, save the odd field of the current frame in the buffer.
		   It will be used to creat the next frame. */
		src = (Pixel *)((char *)source->GetBuffer() + src_pitch);
		lumptr = m_mfd.prevFrame + w;
		for (y = 0; y < h/2; y++)
		{
			//memcpy(lumptr, src, w*4);
			
			ASM_COPY(lumptr, src, w);

			lumptr += 2 * w;
			src = (Pixel *)((char *)src + 2 * src_pitch);
		}
		return 0;
	}

	/* End special mode code. Doing full motion-adaptive deinterlacing. */

	if (h<2) return 0;

	// Compute differences for all pixels by comparing each pixel
    // to its corresponding pixel in the previous frame. Then do a threshold
    // test of the difference to decide if the pixel is 'moving'.
	// Then create an array of flags indicating pixels that have moved since
	// the last frame.

	lumptr = m_mfd.prevFrame;
	unsigned long *maskptr = m_mfd.moving + m_mfd.bitpitch;

	for (y = 1; y < h - 1; y++)
	{
		long pixmask = 0;

		src = (Pixel *)((char *)src + src_pitch);

		x = 0;
		do
		{
			// Set the moving flag if the diff exceeds the configured
            // threshold.
			pixmask <<= 1;
			prevValue = *lumptr;
			*lumptr++ = src[x];
			b = (src[x] & 0xff);
			b0 = (prevValue & 0xff);
			if (abs(b - b0) > m_mfd.threshold)
				goto moving;
			r = (src[x] & 0xff0000) >> 16;
			r0 = (prevValue & 0xff0000) >> 16;
			if (abs(r - r0) > m_mfd.threshold)
				goto moving;
			g = (src[x] & 0xff00)   >> 8;
			g0 = (prevValue & 0xff00)   >> 8;
			if (abs(g - g0) > m_mfd.threshold)
				goto moving;			
			goto notmoving;
moving:
			pixmask |= 1;
notmoving:
			if (!(++x & 31))
				*maskptr++ = pixmask;
		} while(x<w);

		if (x & 31)
			*maskptr++ = pixmask << (-x&31);
	}

	// Remove combing from the motion areas and render.
    // The first line gets a free ride.
	src = (Pixel32 *)source->GetBuffer();
	dst = (Pixel32 *)dest->GetBuffer();
	//memcpy(dst, src, w*4);

	ASM_COPY(dst, src, w);

	maskptr = m_mfd.moving;

	for (y = 1; y < h - 1; y++)
	{
		unsigned long mask;

		src = (Pixel *)((char *)src + src_pitch);
		dst = (Pixel *)((char *)dst + dst_pitch);

		if (m_mfd.motionOnly) {
			if (m_mfd.Blend) {
				x = 0;
				do {
					if (!(x & 31)) {
						mask = maskptr[bitpitch] | maskptr[0] | maskptr[bitpitch*2];
						++maskptr;
					}

					if ((signed long)mask >= 0)
						dst[x] = 0;
					else {
						
						/* Blend fields. */
						p0 = src[x];
						p0 &= 0x00fefefe;

						p1 = ((Pixel32 *)(((char *)src)-pitch))[x];
						p1 &= 0x00fcfcfc;

						p2 = ((Pixel32 *)(((char *)src)+pitch))[x];
						p2 &= 0x00fcfcfc;

						dst[x] = (p0>>1) + (p1>>2) + (p2>>2);
					}
					mask <<= 1;

				} while(++x<w);
			} else {
				x = 0;
				do {
					if (!(x & 31)) {
						mask = maskptr[bitpitch] | maskptr[0] | maskptr[bitpitch*2];
						++maskptr;
					}

					if ((signed long)mask >= 0)
						dst[x] = 0x0;
					else if (y&1) {
						p1 = ((Pixel32 *)(((char *)src)-pitch))[x];
						p1 &= 0x00fefefe;

						p2 = ((Pixel32 *)(((char *)src)+pitch))[x];
						p2 &= 0x00fefefe;
						dst[x] = (p1>>1) + (p2>>1);
					} else
						dst[x] = src[x];

					mask <<= 1;

				} while(++x<w);
			}
		} else {
			if (m_mfd.Blend) {
				x = 0;
				do {
					if (!(x & 31)) {
						mask = maskptr[bitpitch] | maskptr[0] | maskptr[bitpitch*2];
						++maskptr;
					}

					if ((signed long)mask >= 0)
						dst[x] = src[x];
					else {

						/* Blend fields. */
						p0 = src[x];
						p0 &= 0x00fefefe;

						p1 = ((Pixel32 *)(((char *)src)-pitch))[x];
						p1 &= 0x00fcfcfc;

						p2 = ((Pixel32 *)(((char *)src)+pitch))[x];
						p2 &= 0x00fcfcfc;

						dst[x] = (p0>>1) + (p1>>2) + (p2>>2);
					}
					mask <<= 1;

				} while(++x<w);
			} else {
				// Doing line interpolate. Thus, even lines are going through
				// for moving and non-moving mode. Odd line pixels will be subject
				// to the motion test.
				if (y&1) {
					x = 0;
					do {
						if (!(x & 31)) {
							mask = maskptr[bitpitch] | maskptr[0] | maskptr[bitpitch*2];
							++maskptr;
						}

						if ((signed long)mask >= 0)
							dst[x] = src[x];
						else {
							p1 = ((Pixel32 *)(((char *)src)-pitch))[x];
							p1 &= 0x00fefefe;

							p2 = ((Pixel32 *)(((char *)src)+pitch))[x];
							p2 &= 0x00fefefe;

							dst[x] = (p1>>1) + (p2>>1);
						}

						mask <<= 1;

					} while(++x<w);
				} else {
					// Even line; pass it through.
					//memcpy(dst, src, w*4);

					ASM_COPY(dst, src, w);

					maskptr += bitpitch;
				}
			}
		}

	}
	
	// The last line gets a free ride.
	src = (Pixel *)((char *)src + src_pitch);
	dst = (Pixel *)((char *)dst + dst_pitch);
	memcpy(dst, src, w*4);

	return 0;
}

void FlDeinterlacer::BlendFrame(Pixel8 *src, Pixel8 *dst, int w, int h, int pitch)
{
  __asm{

    mov esi, src
    mov edi, dst
    mov ebx, pitch
    mov ecx, w
    mov eax, h

    dec eax ;

row_loop:
    mov ecx, w
    shr ecx, 2 

    pxor mm2, mm2
    pxor mm3, mm3

col_loop:


    movd mm0, [esi]      ;grab 4 pixels first line
    movd mm1, [esi + ebx] ;grab 4 pixels next line

    punpcklbw mm0, mm2     ;first 4 pels
    punpcklbw mm1, mm3

    paddw mm0, mm1
    add esi, 4

    psrlw mm0, 1
    packuswb mm0, mm0

    movd [edi], mm0
    add edi, 4


    dec ecx
    jnz col_loop

    dec eax
    jnz row_loop


; copy the last line
    mov ecx, w
    shr ecx, 2
last_line:

    movd mm0, [esi]
    movd [edi], mm0

    add esi, 4
    add edi, 4

    dec ecx
    jnz last_line

    emms

  }
  return;
}

int FlDeinterlacer::DeInterlaceYV12(CFrame *source, CFrame *dest) 
{
  int w = source->GetWidth();
  int h = source->GetHeight();
  int hw = w>>1;
  int hh = h>>1;
  int ysize = w*h;
  int crsize = hh*hw;
  Pixel8 *pSrc = (Pixel8 *)source->GetBuffer();
  Pixel8 *pDst = (Pixel8 *)dest->GetBuffer();

  // Y
  BlendFrame( pSrc, pDst, w, h, w );
  // V
  BlendFrame( pSrc + ysize, 
             pDst + ysize, hw, hh, hw );
  // U
  BlendFrame( pSrc + ysize + crsize,
              pDst + ysize + crsize, 
              hw, hh, hw);


  return 0;
}

int FlDeinterlacer::DeInterlace(CFrame *source, CFrame *dest) 
{
  if(!source || !dest)
    return 0;

  switch( source->GetFormat() )
  {
  case FRAME_RGB32:
    return DeInterlaceRGB32(source, dest);
    break;
  case FRAME_YV12:
    return DeInterlaceYV12( source, dest );
    break;
  default:
    dest->SetFrame( source );
    break;
  }
  return 1;
}


FlDeinterlacer::FlDeinterlacer()
{
  m_bConfigured = false;
}

int FlDeinterlacer::Configure(void *conf, int confsize )
{
  FLASSERT( confsize==sizeof(TDeinterlacerConfig) )
  TDeinterlacerConfig *rc = (TDeinterlacerConfig *)conf;

  m_cfg = *rc;
  m_bConfigured = true;
  return flfil_ok;
}

int FlDeinterlacer::ValFilterConf(flfilter_conf *fc )
{
  if(!m_bConfigured)
  {
    DBG_STR((str, "FlDeinterlacer::Validate - You need to configure first\n"))
      return flfil_error;
  }
  
  // if input is 0,0 is not valid
  if( fc->iw==0 ||
      fc->ih==0 )
      return flfil_error;

  // the same delay
  fc->od = fc->id;

  // output res
  fc->ow = fc->iw;
  fc->oh = fc->ih;

  // no on-place, no lag
  fc->op = 0;
  fc->olag = 0;
  fc->oprovided = 0;
  fc->ocanmodify = 1;

  // store this validation data for start
  m_fc = *fc;
  
  return 1;
}

int FlDeinterlacer::GetFilterConf( flfilter_conf *fc )
{
  if(!m_bConfigured)
  {
    DBG_STR((str, "FlDeinterlacer::GetConf - You need to configure first\n"))
      return 0;
  }
  
  if(!fc)
    return flfil_error;
  
  *fc = m_fc;
  
  return 1;
}

int FlDeinterlacer::StartSimple()
{
	if(!m_bConfigured)
		return 0;

  int w = m_fc.iw;
  int h = m_fc.ih;

  switch( m_fc.iformat )
  {
    case FRAME_RGB32:
	    m_mfd.prevFrame		= new int[ w*h ];
	    m_mfd.fieldShift     = 0;
	    m_mfd.Blend          = m_cfg.blend;
	    m_mfd.threshold      = m_cfg.threshold;
	    m_mfd.motionOnly     = 0;

	    memset(m_mfd.prevFrame, 0, w*h*sizeof(int));


	    m_mfd.bitpitch		= (w + 32 - 1) / 32;
	    m_mfd.moving			= new unsigned long[m_mfd.bitpitch * h];
	    memset(m_mfd.moving, 0, sizeof(unsigned long)*m_mfd.bitpitch * h);
	    m_mfd.skip = FALSE;
	    m_mfd.first = TRUE;
      break;
    case FRAME_YV12:
      // nothing to do for YV12
      break;
  }

  return flfil_ok;
}

int FlDeinterlacer::ProcessSimple(CFrame *in, CFrame *out)
{
  DeInterlace(in, out);
  return flfil_ok;
}

int FlDeinterlacer::StopSimple()
{
  switch( m_fc.iformat )
  {
    case FRAME_RGB32:
      if( m_mfd.prevFrame )
      {
        delete[] m_mfd.prevFrame;	
        m_mfd.prevFrame = NULL;
      }
	    if( m_mfd.moving )
      {
	      delete[] m_mfd.moving;
        m_mfd.moving = NULL;
      }
      break;
    case FRAME_YV12:
      // nothing to do for YV12
      break;
  }

  return flfil_ok;
}