/* 
 *  FrameSource.h
 *
 *	Copyright (C) Alberto Vigata - July 2000 - ultraflask@yahoo.com
 *
 *  This file is part of FlasKMPEG, a free MPEG to MPEG/AVI converter
 *	
 *  FlasKMPEG is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *   
 *  FlasKMPEG is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *   
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 */


#ifndef FRAMESOURCE_H
#define FRAMESOURCE_H


#define FRAME_INTERLACED  0x01  // Full frame but interlaced
#define FRAME_PROGRESSIVE 0x02  // Full progressive frame
#define FRAME_TOPFIELD    0x04  // Just top field present
#define FRAME_BOTTOMFIELD 0x08  // Just bottom field present
#define FRAME_ISLASTFRAME 0x10  // Last frame of the stream

#define FRAME_MEM_ALIGN   16

#define MAX_BUF 2048*2048*32

#include <windows.h>
#include "flasktypes.h"
#include "thread.h"
#include <list>
using namespace std;

// frame formats
#define FRAME_RGB    0
#define FRAME_YUV420 2
#define FRAME_YUV422 3
#define FRAME_YUV444 4
#define FRAME_YUY2   1

static const __int64 mmmask_0001 = 0x0001000100010001;
static const __int64 mmmask_0002 = 0x0002000200020002;
static const __int64 mmmask_0003 = 0x0003000300030003;
static const __int64 mmmask_0004 = 0x0004000400040004;
static const __int64 mmmask_0005 = 0x0005000500050005;
static const __int64 mmmask_0007 = 0x0007000700070007;
static const __int64 mmmask_0016 = 0x0010001000100010;
static const __int64 mmmask_0040 = 0x0040004000400040;
static const __int64 mmmask_0128 = 0x0080008000800080;
static const __int64 mmmask_cbu = 0x0000408D0000408D;
static const __int64 mmmask_cgu_cgv = 0xF377E5FCF377E5FC;
static const __int64 mmmask_crv = 0x0000331300003313;
static const __int64 YUVRGB_Scale = 0x1000254310002543;

        
// Format definition
typedef struct YVImage
{
  unsigned char *Y;
  int			  nLumaX;
  int			  nLumaY;
  unsigned char *U;
  unsigned char *V;
  int		  nChromaX;
  int		  nChromaY;
} TYUVImage;



class CFrame;
class CFrameBuffer
{
public:
    virtual ~CFrameBuffer(){};
    virtual CFrame *GetFreeFrame() =0;
    virtual void AddFreeFrame(CFrame *pFrame) =0;
};

class CFrame
{
public:
  CFrame( CFrameBuffer *pFrameBuffer )
  {
    m_pFrameBuffer = pFrameBuffer;
    m_nPresTime     = 0;
    m_nWidth = m_nHeigth = 0;;
    m_nFormat = 0;
    m_nDepth = 0;
    m_nRef = 0;
    m_pData = 0;
    m_nFrameFlags = 0;
    m_nDataOffset = 0; 
    m_bOwnBuffer = false;
    m_nAllocatedOffset = NULL;
    memset( &m_sBitmapInfo, 0, sizeof( m_sBitmapInfo ) );
  }
  ~CFrame(){
    DeAlloc();
  };
  

  CFrame &operator =(CFrame &oFrame)
  {
    if(!oFrame.IsValid())
      return *this;
    // free my buffer.
    DeAlloc();

    m_nWidth = oFrame.GetWidth();
    m_nHeigth = oFrame.GetHeigth();
    m_nPresTime = oFrame.GetPresTime();
    m_nFormat = oFrame.GetFormat();
    m_nDepth = oFrame.GetDepth();
    m_nFrameFlags = oFrame.GetFlags();
    m_bOwnBuffer = true;
    Alloc();
    memcpy( m_pData, oFrame.GetBuffer(), oFrame.GetBufferSize() );
    return *this;
  }
  void Release()
  {
    CFlAutoLock lockObject(&m_csObject);
    //ASSERT(m_nRef>0);
    if(m_nRef==0)
      return;

    m_nRef--;
    if(m_nRef==0)
      if(m_pFrameBuffer)
        m_pFrameBuffer->AddFreeFrame(this);
  }
  void AddRef()
  {
    CFlAutoLock lockObject(&m_csObject);
    m_nRef++;
  }
  void SetPresTime( ui64 nPresTime ) { m_nPresTime = nPresTime; }
  void SetSize( ui32 nWidth, ui32 nHeigth )
  {
    m_nWidth = nWidth;
    m_nHeigth = nHeigth;
  }
  void Set( ui32 nWidth, ui32 nHeight, ui32 nFormat, ui32 nDepth, ui32 nFlags )
  {
    DeAlloc();
    m_nWidth = nWidth;
    m_nHeigth = nHeight;
    m_nFormat = nFormat;
    m_nDepth = nDepth;
    m_nFrameFlags = nFlags;
    Alloc();
  }

  void SetOffset (ui32 nOffset){ m_nDataOffset = nOffset; }
  void SetFormat ( ui32 nFormat ){ m_nFormat = nFormat; }
  void SetDepth ( ui32 nDepth ){ m_nDepth = nDepth; }
  void SetBuffer ( ui8 *pData ){ m_pData = pData; }
  void SetFlags ( ui32 nFlags ){ m_nFrameFlags = nFlags; }
  void SetLastFrame(){ m_nFrameFlags |= FRAME_ISLASTFRAME; }
  ui8* GetBuffer(){ return m_pData+m_nDataOffset; }
  ui8* GetData(){ return m_pData;}
  ui32 GetWidth(){ return m_nWidth; }
  ui32 GetHeigth(){ return m_nHeigth; }
  ui64 GetPresTime(){ return m_nPresTime; }
  ui32 GetFormat(){ return m_nFormat; }
  ui32 GetDepth(){ return m_nDepth; }
  ui32 GetFlags(){ return m_nFrameFlags; }
  bool IsField(){ return (m_nFrameFlags&FRAME_TOPFIELD) || (m_nFrameFlags&FRAME_BOTTOMFIELD); }
  bool IsFull(){ return (m_nFrameFlags&FRAME_PROGRESSIVE) || (m_nFrameFlags&FRAME_INTERLACED); }
  bool IsLastFrame() { return (m_nFrameFlags&FRAME_ISLASTFRAME)>0; }

  // Copy one field from an image without changing the actual format
  // of the frame
  void SetField( CFrame *pFrame, bool bTopField )
  {
    if(m_nFormat!=FRAME_RGB ||
       m_nFormat!=FRAME_YUY2 )

    // Resolutions must match
    if( (m_nWidth  != pFrame->GetWidth()) ||
        (m_nHeigth != pFrame->GetHeigth()) )
        return;
    switch( pFrame->GetFormat() )
    {
      case FRAME_YUV444:
        TYUVImage *YUVImage = (TYUVImage *)pFrame->GetBuffer();
        if(bTopField)
          From444toRGB32odd(YUVImage->Y, YUVImage->U, YUVImage->V, (unsigned char *)m_pData, m_nWidth, m_nHeigth);
        else
          From444toRGB32even(YUVImage->Y, YUVImage->U, YUVImage->V, (unsigned char *)m_pData, m_nWidth, m_nHeigth);
        break;
    }

  }
  void SetFrame( CFrame *pFrame )
  {
    if(m_nFormat!=FRAME_RGB ||
      m_nFormat!=FRAME_YUY2 )
      
      // Resolutions must match
      if( (m_nWidth  != pFrame->GetWidth()) ||
        (m_nHeigth != pFrame->GetHeigth()) )
        return;
      switch( pFrame->GetFormat() )
      {
      case FRAME_YUV444:
        TYUVImage *YUVImage = (TYUVImage *)pFrame->GetBuffer();
        From444toRGB32odd(YUVImage->Y, YUVImage->U, YUVImage->V, (unsigned char *)m_pData, m_nWidth, m_nHeigth);
        From444toRGB32even(YUVImage->Y, YUVImage->U, YUVImage->V, (unsigned char *)m_pData, m_nWidth, m_nHeigth);
        break;
      }
      
  }
  
  void Erase()
  {
    int bufSize;
    switch(m_nFormat)
    {
    case FRAME_RGB:
      bufSize = m_nWidth * m_nHeigth * (m_nDepth/8);
      if(bufSize>MAX_BUF)
        return;
      memset( m_pData, 0, bufSize) ;
      break;
    }
    return;
  }

  bool Alloc()
  {
    m_bOwnBuffer = true;
    bool bSuccess = true;
    int bufSize;
    switch(m_nFormat)
    {
      case FRAME_RGB:
        bufSize = m_nWidth * m_nHeigth * (m_nDepth/8);
        if(bufSize>MAX_BUF)
          return false;
        m_pData = aligned_new(bufSize);
        break;
    }
    return true;
  }
  void DeAlloc()
  {
    if(m_pData && m_bOwnBuffer)
      aligned_delete(m_pData);
    m_pData = NULL;
  }
  ui8 *aligned_new(ui32 nSize)
  {
    if(!nSize)
      return NULL;
    // Increase the allocated
    // memory to be able to align
    nSize += FRAME_MEM_ALIGN;
    // Allocate
    m_pData = new ui8[nSize];
    if(!m_pData)
      return NULL;
    m_nAllocatedOffset = ((ui32)m_pData)%FRAME_MEM_ALIGN;
    m_pData += m_nAllocatedOffset;
    return m_pData;
  }
  void aligned_delete(ui8 *pAddress)
  {
    delete [](pAddress-m_nAllocatedOffset);
  }

  ui32 GetBufferSize()
  {
    int bufSize=0;
    switch(m_nFormat)
    {
    case FRAME_RGB:
      bufSize = m_nWidth * m_nHeigth * (m_nDepth/8);
      if(bufSize>MAX_BUF)
        bufSize = 0;
      break;
    }
    return bufSize;
  }
  BITMAPINFO *GetBmpInfo()
  {
    m_sBitmapInfo.bmiHeader.biWidth  = m_nWidth;
    m_sBitmapInfo.bmiHeader.biHeight = m_nHeigth;
    m_sBitmapInfo.bmiHeader.biCompression = m_nFormat==FRAME_RGB ? BI_RGB : m_nFormat;
    m_sBitmapInfo.bmiHeader.biSize = sizeof (BITMAPINFOHEADER);
    m_sBitmapInfo.bmiHeader.biPlanes = 1;
    m_sBitmapInfo.bmiHeader.biBitCount = (WORD)m_nDepth;
    return &m_sBitmapInfo;
  }
  bool GetFromBmp(HBITMAP hBitmap)
  {
    // Getinfo of the bitmap
    BITMAP sBitmap;
    if(!GetObject( hBitmap, sizeof(BITMAP),&sBitmap ))
      return false;

    DeAlloc();
    // Set properties for out bitmap
    // and allocate the space for it
    m_nWidth  = sBitmap.bmWidth;
    m_nHeigth = sBitmap.bmHeight;
    m_nDepth  = 32;
    m_nFormat = FRAME_RGB;
    Alloc();
    // Get the bitmap data
    HDC hDCScreen = GetDC( NULL );

    if(!GetDIBits( hDCScreen, hBitmap, 0, m_nHeigth, m_pData, GetBmpInfo(), DIB_RGB_COLORS))
      return false;
  
      return true;
  }

  TYUVImage *GetYuvInfo()
  {
    return &m_sYuvInfo;
  }

  bool IsValid(){
    return (m_pData!=NULL) && (m_nWidth!=0) && (m_nHeigth!=0);
  }
  
  void From420to422(unsigned char *src, unsigned char *dst, int width, int height, int frame_type)
  {

    int hwidth    = width / 2;
    int dwidth    = width * 2;
    int hheightd2 = height / 2 - 2;
    int qheightd2 = height / 4 - 2;
    
    if (frame_type)
    {
      __asm
      {
        mov			eax, [src]
          mov			ebx, [dst]
          mov			ecx, ebx
          add			ecx, [hwidth]
          mov			esi, 0x00
          movq		mm3, [mmmask_0003]
          pxor		mm0, mm0
          movq		mm4, [mmmask_0002]
          
          mov			edx, eax
          add			edx, [hwidth]
convyuv422topp:
           movd		mm1, [eax+esi]
          movd		mm2, [edx+esi]
          movd		[ebx+esi], mm1
          punpcklbw	mm1, mm0
          pmullw		mm1, mm3
          paddusw		mm1, mm4
          punpcklbw	mm2, mm0
          paddusw		mm2, mm1
          psrlw		mm2, 0x02
          packuswb	mm2, mm0
          
          add			esi, 0x04
          cmp			esi, [hwidth]
          movd		[ecx+esi-4], mm2
          jl			convyuv422topp
          
          add			eax, [hwidth]
          add			ebx, [width]
          add			ecx, [width]
          mov			esi, 0x00
          
          mov			edi, [hheightd2]
convyuv422p:
          movd		mm1, [eax+esi]
          
          punpcklbw	mm1, mm0
          mov			edx, eax
          
          pmullw		mm1, mm3
          sub			edx, [hwidth]
          
          movd		mm5, [edx+esi]
          movd		mm2, [edx+esi]
          
          punpcklbw	mm5, mm0
          punpcklbw	mm2, mm0
          paddusw		mm5, mm1
          paddusw		mm2, mm1
          paddusw		mm5, mm4
          paddusw		mm2, mm4
          psrlw		mm5, 0x02
          psrlw		mm2, 0x02
          packuswb	mm5, mm0
          packuswb	mm2, mm0
          
          mov			edx, eax
          add			edx, [hwidth]
          add			esi, 0x04
          cmp			esi, [hwidth]
          movd		[ebx+esi-4], mm5
          movd		[ecx+esi-4], mm2
          
          jl			convyuv422p
          
          add			eax, [hwidth]
          add			ebx, [width]
          add			ecx, [width]
          mov			esi, 0x00
          sub			edi, 0x01
          cmp			edi, 0x00
          jg			convyuv422p
          
          mov			edx, eax
          sub			edx, [hwidth]
convyuv422bottomp:
        movd		mm1, [eax+esi]
          movd		mm5, [edx+esi]
          punpcklbw	mm5, mm0
          movd		[ecx+esi], mm1
          
          punpcklbw	mm1, mm0
          pmullw		mm1, mm3
          paddusw		mm5, mm1
          paddusw		mm5, mm4
          psrlw		mm5, 0x02
          packuswb	mm5, mm0
          
          add			esi, 0x04
          cmp			esi, [hwidth]
          movd		[ebx+esi-4], mm5
          jl			convyuv422bottomp
          
          emms
      }
    }
    else
    {
      __asm
      {
        mov			eax, [src]
          mov			ecx, [dst]
          mov			esi, 0x00
          pxor		mm0, mm0
          movq		mm3, [mmmask_0003]
          movq		mm4, [mmmask_0004]
          movq		mm5, [mmmask_0005]
          
convyuv422topi:
        movd		mm1, [eax+esi]
          mov			ebx, eax
          add			ebx, [hwidth]
          movd		mm2, [ebx+esi]
          movd		[ecx+esi], mm1
          punpcklbw	mm1, mm0
          movq		mm6, mm1
          pmullw		mm1, mm3
          
          punpcklbw	mm2, mm0
          movq		mm7, mm2
          pmullw		mm2, mm5
          paddusw		mm2, mm1
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          mov			edx, ecx
          add			edx, [hwidth]
          pmullw		mm6, mm5
          movd		[edx+esi], mm2
          
          add			ebx, [hwidth]
          movd		mm2, [ebx+esi]
          punpcklbw	mm2, mm0
          pmullw		mm2, mm3
          paddusw		mm2, mm6
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          add			edx, [hwidth]
          add			ebx, [hwidth]
          pmullw		mm7, [mmmask_0007]
          movd		[edx+esi], mm2
          
          movd		mm2, [ebx+esi]
          punpcklbw	mm2, mm0
          paddusw		mm2, mm7
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          add			edx, [hwidth]
          add			esi, 0x04
          cmp			esi, [hwidth]
          movd		[edx+esi-4], mm2
          
          jl			convyuv422topi
          
          add			eax, [width]
          add			ecx, [dwidth]
          mov			esi, 0x00
          
          mov			edi, [qheightd2]
convyuv422i:
        movd		mm1, [eax+esi]
          punpcklbw	mm1, mm0
          movq		mm6, mm1
          mov			ebx, eax
          sub			ebx, [width]
          movd		mm3, [ebx+esi]
          pmullw		mm1, [mmmask_0007]
          punpcklbw	mm3, mm0
          paddusw		mm3, mm1
          paddusw		mm3, mm4
          psrlw		mm3, 0x03
          packuswb	mm3, mm0
          
          add			ebx, [hwidth]
          movq		mm1, [ebx+esi]
          add			ebx, [width]
          movd		[ecx+esi], mm3
          
          movq		mm3, [mmmask_0003]
          movd		mm2, [ebx+esi]
          
          punpcklbw	mm1, mm0
          pmullw		mm1, mm3
          punpcklbw	mm2, mm0
          movq		mm7, mm2
          pmullw		mm2, mm5
          paddusw		mm2, mm1
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          pmullw		mm6, mm5
          mov			edx, ecx
          add			edx, [hwidth]
          movd		[edx+esi], mm2
          
          add			ebx, [hwidth]
          movd		mm2, [ebx+esi]
          punpcklbw	mm2, mm0
          pmullw		mm2, mm3
          paddusw		mm2, mm6
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          pmullw		mm7, [mmmask_0007]
          add			edx, [hwidth]
          add			ebx, [hwidth]
          movd		[edx+esi], mm2
          
          movd		mm2, [ebx+esi]
          punpcklbw	mm2, mm0
          paddusw		mm2, mm7
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          add			edx, [hwidth]
          add			esi, 0x04
          cmp			esi, [hwidth]
          movd		[edx+esi-4], mm2
          
          jl			convyuv422i
          add			eax, [width]
          add			ecx, [dwidth]
          mov			esi, 0x00
          sub			edi, 0x01
          cmp			edi, 0x00
          jg			convyuv422i
          
convyuv422bottomi:
        movd		mm1, [eax+esi]
          movq		mm6, mm1
          punpcklbw	mm1, mm0
          mov			ebx, eax
          sub			ebx, [width]
          movd		mm3, [ebx+esi]
          punpcklbw	mm3, mm0
          pmullw		mm1, [mmmask_0007]
          paddusw		mm3, mm1
          paddusw		mm3, mm4
          psrlw		mm3, 0x03
          packuswb	mm3, mm0
          
          add			ebx, [hwidth]
          movq		mm1, [ebx+esi]
          punpcklbw	mm1, mm0
          movd		[ecx+esi], mm3
          
          pmullw		mm1, [mmmask_0003]
          add			ebx, [width]
          movd		mm2, [ebx+esi]
          punpcklbw	mm2, mm0
          movq		mm7, mm2
          pmullw		mm2, mm5
          paddusw		mm2, mm1
          paddusw		mm2, mm4
          psrlw		mm2, 0x03
          packuswb	mm2, mm0
          
          mov			edx, ecx
          add			edx, [hwidth]
          pmullw		mm7, [mmmask_0007]
          movd		[edx+esi], mm2
          
          add			edx, [hwidth]
          movd		[edx+esi], mm6
          
          punpcklbw	mm6, mm0
          paddusw		mm6, mm7
          paddusw		mm6, mm4
          psrlw		mm6, 0x03
          packuswb	mm6, mm0
          
          add			edx, [hwidth]
          add			esi, 0x04
          cmp			esi, [hwidth]
          movd		[edx+esi-4], mm6
          
          jl			convyuv422bottomi
          
          emms
      }
    }
  }

  void From422to444(unsigned char *src, unsigned char *dst, int width, int height)
  {
    int hwidthd8 = width / 2 - 8;
    int hwidth    = width / 2;

    __asm
    {
      mov			eax, [src]
        mov			ebx, [dst]
        mov			edi, [height]
        
        movq		mm1, [mmmask_0001]
        pxor		mm0, mm0
        
convyuv444init:
      movq		mm7, [eax]
        mov			esi, 0x00
        
convyuv444:
      movq		mm2, mm7
        movq		mm7, [eax+esi+8]
        movq		mm3, mm2
        movq		mm4, mm7
        
        psrlq		mm3, 8
        psllq		mm4, 56
        por			mm3, mm4
        
        movq		mm4, mm2
        movq		mm5, mm3
        
        punpcklbw	mm4, mm0
        punpcklbw	mm5, mm0
        
        movq		mm6, mm4
        paddusw		mm4, mm1
        paddusw		mm4, mm5
        psrlw		mm4, 1
        psllq		mm4, 8
        por			mm4, mm6
        
        punpckhbw	mm2, mm0
        punpckhbw	mm3, mm0
        
        movq		mm6, mm2
        paddusw		mm2, mm1
        paddusw		mm2, mm3
        
        movq		[ebx+esi*2], mm4
        
        psrlw		mm2, 1
        psllq		mm2, 8
        por			mm2, mm6
        
        add			esi, 0x08
        cmp			esi, [hwidthd8]
        movq		[ebx+esi*2-8], mm2
        jl			convyuv444
        
        movq		mm2, mm7
        punpcklbw	mm2, mm0
        movq		mm3, mm2
        
        psllq		mm2, 8
        por			mm2, mm3
        
        movq		[ebx+esi*2], mm2
        
        punpckhbw	mm7, mm0
        movq		mm6, mm7
        
        psllq		mm6, 8
        por			mm6, mm7
        
        movq		[ebx+esi*2+8], mm6
        
        add			eax, [hwidth]		
        add			ebx, [width]
        sub			edi, 0x01
        cmp			edi, 0x00
        jg			convyuv444init
        
        emms
    }
  }
  void From422toYUY2odd(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int width, int height)
  {
    int dwidth    = width * 2;
    int qwidth    = width * 4;
    int hwidth    = width / 2;
    __asm
    {
      mov			eax, [py]
        mov			ebx, [pu]
        mov			ecx, [pv]
        mov			edx, [dst]
        mov			esi, 0x00
        mov			edi, [height]
        
yuy2conv:
      movd		mm2, [ebx+esi]
        movd		mm3, [ecx+esi]
        punpcklbw	mm2, mm3
        movq		mm1, [eax+esi*2]
        movq		mm4, mm1
        punpcklbw	mm1, mm2
        punpckhbw	mm4, mm2
        
        add			esi, 0x04
        cmp			esi, [hwidth]
        movq		[edx+esi*4-16], mm1
        movq		[edx+esi*4-8], mm4
        jl			yuy2conv
        
        add			eax, [dwidth]
        add			ebx, [width]
        add			ecx, [width]
        add			edx, [qwidth]
        sub			edi, 0x02
        mov			esi, 0x00
        cmp			edi, 0x00
        jg			yuy2conv
        
        emms
    }    
  }
  
  void From422toYUY2even(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int width, int height)
  {
    int hwidth    = width / 2;
    int dwidth    = width * 2;
    int qwidth    = width * 4;
    py += width; pu += hwidth; pv += hwidth; dst += dwidth;
    
    __asm
    {
      mov			eax, [py]
        mov			ebx, [pu]
        mov			ecx, [pv]
        mov			edx, [dst]
        mov			esi, 0x00
        mov			edi, [height]
        
yuy2conv:
      movd		mm2, [ebx+esi]
        movd		mm3, [ecx+esi]
        punpcklbw	mm2, mm3
        movq		mm1, [eax+esi*2]
        movq		mm4, mm1
        punpcklbw	mm1, mm2
        punpckhbw	mm4, mm2
        
        add			esi, 0x04
        cmp			esi, [hwidth]
        movq		[edx+esi*4-16], mm1
        movq		[edx+esi*4-8], mm4
        jl			yuy2conv
        
        add			eax, [dwidth]
        add			ebx, [width]
        add			ecx, [width]
        add			edx, [qwidth]
        sub			edi, 0x02
        mov			esi, 0x00
        cmp			edi, 0x00
        jg			yuy2conv
        
        emms
    }
  }
void From444toRGB32odd(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst,int width, int height)
{
	dst += width * (height-2) * 4;
	py += width; pu += width; pv += width;
  int nwidth = width * 12;
  int dwidth    = width * 2;

	__asm
	{
		mov			eax, [py]
		mov			ebx, [pu]
		mov			ecx, [pv]
		mov			edx, [dst]
		mov			edi, [height]
		mov			esi, 0x00
		pxor		mm0, mm0

convRGB24:
		movd		mm1, [eax+esi]       ;mm1: [00][00][00][00][ y][ y][ y][ y]
		movd		mm3, [ebx+esi]       ;mm3: [00][00][00][00][ u][ u][ u][ u]
    punpcklbw	mm1, mm0           ;mm1: [00][ y][00][ y][00][ y][00][ y]
    punpcklbw	mm3, mm0           ;mm3: [00][ u][00][ u][00][ u][00][ u]
    movd		mm5, [ecx+esi]       ;mm5: [00][00][00][00][ v][ v][ v][ v]
		punpcklbw	mm5, mm0           ;mm5: [00][ v][00][ v][00][ v][00][ v]
    movq		mm7, [mmmask_0128]   ;mm7: [  -128][  -128][  -128][  -128]
    psubw		mm3, mm7             ;mm3: [ u-128][ u-128][ u-128][ u-128]
    psubw		mm5, mm7             ;mm5: [ v-128][ v-128][ v-128][ v-128]

    psubw		mm1, [mmmask_0016]   ;mm1: [ y-16 ][ y-16 ][ y-16 ][ y-16 ]
    movq		mm2, mm1             ;mm2: [ y-16 ][ y-16 ][ y-16 ][ y-16 ]
    movq		mm7, [mmmask_0001]   ;mm7: [0001][0001][0001][0001]
    punpcklwd	mm1, mm7           ;mm1: [0001][ y-16 ][0001][ y-16 ]
    punpckhwd	mm2, mm7           ;mm2: [0001][ y-16 ][0001][ y-16 ]
    movq		mm7, [YUVRGB_Scale]  ;mm7: [1000][2000][1000][2000]
    pmaddwd		mm1, mm7           ;mm1: [         y][         y]
		pmaddwd		mm2, mm7           ;mm2: [         y][         y]

    movq		mm4, mm3             ;mm4: [ u-128][ u-128][ u-128][ u-128]
    punpcklwd	mm3, mm0           ;mm3: [ 0000 ][ u-128][ 0000 ][ u-128]
    punpckhwd	mm4, mm0           ;mm4: [ 0000 ][ u-128][ 0000 ][ u-128]
    movq		mm7, [mmmask_cbu]    ;mm7: [0000][408D][0000][408D]
    pmaddwd		mm3, mm7           ;mm3: [ mult u       ][ mult u       ]
		pmaddwd		mm4, mm7           ;mm4: [ mult u       ][ mult u       ]
    paddd		mm3, mm1             ;mm3: [ mult u +y    ][ mult u +y    ]
    paddd		mm4, mm2             ;mm4: [ mult u +y    ][ mult u +y    ]
		psrld		mm3, 13
		psrld		mm4, 13
    packuswb	mm3, mm0           ;mm3: [00][00][00][00][00][ b][00][ b]
    packuswb	mm4, mm0           ;mm4: [00][00][00][00][00][ b][00][ b]

    movq		mm6, mm5             ;mm6: [ v-128][ v-128][ v-128][ v-128]
		punpcklwd	mm5, mm0
		punpckhwd	mm6, mm0
		movq		mm7, [mmmask_crv]
		pmaddwd		mm5, mm7
		pmaddwd		mm6, mm7
		paddd		mm5, mm1
		paddd		mm6, mm2
		psrld		mm5, 13
		psrld		mm6, 13
		packuswb	mm5, mm0         ; mm5: [00][00][00][00][00][ r][00][ r]
		packuswb	mm6, mm0         ; mm6: [00][00][00][00][00][ r][00][ r]

		punpcklbw	mm3, mm5         ; mm3: [00][00][ r][ b][ 0][ 0][ r][ b]
		punpcklbw	mm4, mm6         ; mm4: [00][00][ r][ b][ 0][ 0][ r][ b]
    movq		mm5, mm3           ; mm5: [00][00][ r][ b][ 0][ 0][ r][ b]
    movq		mm6, mm4           ; mm6: [00][00][ r][ b][ 0][ 0][ r][ b]
    psrlq		mm5, 16            ; mm5: [ r][ b][ 0][ 0][ r][ b][ 0][ 0]
		psrlq		mm6, 16            ; mm6: [ r][ b][ 0][ 0][ r][ b][ 0][ 0]
		por			mm3, mm5           ; mm3: [ r][ b][ r][ b][ r][ b][ r][ b]
		por			mm4, mm6           ; mm4: [ r][ b][ r][ b][ r][ b][ r][ b]

    movd		mm5, [ebx+esi]     ; mm5: [00][00][00][00][ u][ u][ u][ u]
		movd		mm6, [ecx+esi]     ; mm6: [00][00][00][00][ v][ v][ v][ v]
    punpcklbw	mm5, mm0         ; mm5: [00][ u][00][ u][00][ u][00][ u]
		punpcklbw	mm6, mm0         ; mm6: [00][ v][00][ v][00][ v][00][ v]
    movq		mm7, [mmmask_0128] ; mm7: [00  80][00  80][00  80][00  80]
    psubw		mm5, mm7           ; mm5: [u - 80][u - 80][u - 80][u - 80]
    psubw		mm6, mm7           ; mm6: [v - 80][v - 80][v - 80][v - 80]

    movq		mm7, mm6           ; mm7: [v - 80][v - 80][v - 80][v - 80]
    punpcklwd	mm6, mm5         ; mm6: [u - 80][v - 80][u - 80][v - 80]
    punpckhwd	mm7, mm5		     ; mm7: [u - 80][v - 80][u - 80][v - 80]
    movq		mm5, [mmmask_cgu_cgv] ;mm5: [F377][E5FC][F377][E5FC]
    pmaddwd		mm6, mm5         ; mm6: [ madd uv      ][  madd uv     ]
		pmaddwd		mm7, mm5         ; mm7: [ madd uv      ][  madd uv     ]
    paddd		mm6, mm1           ; mm6: [ madd uv+y    ][ madd uv+y    ]
		paddd		mm7, mm2           ; mm6: [ madd uv+y    ][ madd uv+y    ]

		psrld		mm6, 13            ; mm6: [0000][   g][0000][   g]
		psrld		mm7, 13            ; mm7: [0000][   g][0000][   g]
    packuswb	mm6, mm0         ; mm6: [00][00][00][00][ g][ 0][ g]
		packuswb	mm7, mm0         ; mm7: [00][00][00][00][ g][ 0][ g]

    punpcklbw	mm3, mm6         ; mm3: [ 0][ r][ g][ b][ 0][ r][ g][ b]
		punpcklbw	mm4, mm7         ; mm4: [ 0][ r][ g][ b][ 0][ r][ g][ b]

    movq [edx],   mm3
    movq [edx+8], mm4

		add			edx, 0x10
		add			esi, 0x04
		cmp			esi, [width]

		jl			convRGB24

		add			eax, [dwidth]
		add			ebx, [dwidth]
		add			ecx, [dwidth]
		sub			edx, [nwidth]
		mov			esi, 0x00
		sub			edi, 0x02
		cmp			edi, 0x00
		jg			convRGB24

		emms
	}
}

static void From444toRGB32even(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int width, int height)
{
	dst += width * (height-1) * 4;
  int nwidth  = width * 12;
  int dwidth  = width * 2;
	__asm
	{
		mov			eax, [py]
		mov			ebx, [pu]
		mov			ecx, [pv]
		mov			edx, [dst]
		mov			edi, [height]
		mov			esi, 0x00
		pxor		mm0, mm0

convRGB24:
		movd		mm1, [eax+esi]
		movd		mm3, [ebx+esi]
		punpcklbw	mm1, mm0
		punpcklbw	mm3, mm0
		movd		mm5, [ecx+esi]
		punpcklbw	mm5, mm0
		movq		mm7, [mmmask_0128]
		psubw		mm3, mm7
		psubw		mm5, mm7

		psubw		mm1, [mmmask_0016]
		movq		mm2, mm1
		movq		mm7, [mmmask_0001]
		punpcklwd	mm1, mm7
		punpckhwd	mm2, mm7
		movq		mm7, [YUVRGB_Scale]
		pmaddwd		mm1, mm7
		pmaddwd		mm2, mm7

		movq		mm4, mm3
		punpcklwd	mm3, mm0
		punpckhwd	mm4, mm0
		movq		mm7, [mmmask_cbu]
		pmaddwd		mm3, mm7
		pmaddwd		mm4, mm7
		paddd		mm3, mm1
		paddd		mm4, mm2
		psrld		mm3, 13
		psrld		mm4, 13
		packuswb	mm3, mm0
		packuswb	mm4, mm0

		movq		mm6, mm5
		punpcklwd	mm5, mm0
		punpckhwd	mm6, mm0
		movq		mm7, [mmmask_crv]
		pmaddwd		mm5, mm7
		pmaddwd		mm6, mm7
		paddd		mm5, mm1
		paddd		mm6, mm2

		psrld		mm5, 13
		psrld		mm6, 13
		packuswb	mm5, mm0
		packuswb	mm6, mm0

		punpcklbw	mm3, mm5
		punpcklbw	mm4, mm6
		movq		mm5, mm3
		movq		mm6, mm4
		psrlq		mm5, 16
		psrlq		mm6, 16
		por			mm3, mm5
		por			mm4, mm6

		movd		mm5, [ebx+esi]
		movd		mm6, [ecx+esi]
		punpcklbw	mm5, mm0
		punpcklbw	mm6, mm0
		movq		mm7, [mmmask_0128]
		psubw		mm5, mm7
		psubw		mm6, mm7

		movq		mm7, mm6
		punpcklwd	mm6, mm5
		punpckhwd	mm7, mm5		
		movq		mm5, [mmmask_cgu_cgv]
		pmaddwd		mm6, mm5
		pmaddwd		mm7, mm5
		paddd		mm6, mm1
		paddd		mm7, mm2

		psrld		mm6, 13
		psrld		mm7, 13
		packuswb	mm6, mm0
		packuswb	mm7, mm0

		punpcklbw	mm3, mm6
		punpcklbw	mm4, mm7

    movq [edx],   mm3
    movq [edx+8], mm4

		add			edx, 0x10
		add			esi, 0x04
		cmp			esi, [width]

		jl			convRGB24

		add			eax, [dwidth]
		add			ebx, [dwidth]
		add			ecx, [dwidth]
		sub			edx, [nwidth]
		mov			esi, 0x00
		sub			edi, 0x02
		cmp			edi, 0x00
		jg			convRGB24

		emms
	}
}

private:
  CFrameBuffer *m_pFrameBuffer;
  
  bool   m_bOwnBuffer;
  ui64   m_nPresTime;
  ui32   m_nWidth, m_nHeigth;
  ui32   m_nFormat;
  ui32   m_nDepth;
  ui32   m_nRef;
  ui8   *m_pData;
  ui32   m_nFrameFlags;

  // memory alignment
  ui32 m_nAllocatedOffset;
  // the image starts at this offset
  ui32 m_nDataOffset;

  TYUVImage m_sYuvInfo;
  BITMAPINFO m_sBitmapInfo;
  CFlCritSec m_csObject;
};

class CFrameSource
{
public:
  virtual bool GetFrame(CFrame **ppFrame)=0;
};



#endif 