
#include <stdio.h>
//#include <stdlib.h>
//#include <string.h>
#include <setjmp.h>

#pragma pack(1)

typedef unsigned int   uint;
typedef unsigned short word;
typedef unsigned char  byte;
typedef unsigned long long int qword;

#ifndef __INTEL_COMPILER
#define restrict __restrict
#endif

#ifdef __GNUC__
 #define NOINLINE __attribute__((noinline))
 #define ALIGN(n) __attribute__((aligned(n)))
#else
 #define NOINLINE __declspec(noinline)
 #define ALIGN(n) __declspec(align(n))
#endif

template <typename T1, typename T2> T1 Min( T1 t1, T2 t2 ) { return t1<t2?t1:t2; }
template <typename T1, typename T2> T1 Max( T1 t1, T2 t2 ) { return t1>t2?t1:t2; }

uint flen( FILE* f ) {
  fseek( f, 0, SEEK_END );
  uint len = ftell(f);
  fseek( f, 0, SEEK_SET );
  return len;
}

enum {
  SCALElog = 15,
  SCALE    = 1<<SCALElog,
  eSCALE = 16*SCALE,
  hSCALE = SCALE/2,
  mSCALE = SCALE-1
};

static const int M_mwr = (eSCALE/(31+16));
static const int M_mW1 = (31616+0) * (1);
static const int M_mmw = (1+0) * (1);
static const int M_mlim = (0+0) * (1);
static const int M_mW0 = (13109+0) * (1);
static const int M_f0wr = (eSCALE/(72+16));
static const int M_f0mw = (84+1) * (1);
static const int M_f1wr = (eSCALE/(296+16));
static const int M_f1mw = (24+1) * (1);

struct LinearMixer {
  word w;
  LinearMixer( int W=SCALE/2 ) : w(W) {}
  int Mixup( int p1, int p2 ) {
    return p1+(((p2-p1)*w)>>SCALElog);
  }
};

struct Counter {
  word P; 
  void Update( const int c, const int wr, const int Mw ) {
    int dp = P + Mw-SCALE + ((SCALE-2*Mw)&(-c));
    dp = ((dp*wr)>>SCALElog);
    int q = P - dp;
    P = (q<mSCALE ) ? q : mSCALE;
  }
};

enum{ 
  STKPAD=1<<16
};

void yield( void* p, int value );

struct coroutine {

  volatile uint state;

  volatile byte* inpptr;
  volatile byte* inpbeg;
  volatile byte* inpend;
  volatile byte* outptr;
  volatile byte* outbeg;
  volatile byte* outend;

  ALIGN(32) jmp_buf PointA;
  ALIGN(32) jmp_buf PointB;

  template <typename T> 
  NOINLINE 
  void call_do_process( T* that ) {
    that->do_process();
  }

  template <typename T> 
  NOINLINE 
  void call_do_process0( T* that ) {
    byte stktmp[STKPAD]; 
    state = stktmp-((byte*)0);
    call_do_process(that);
  }

  template <typename T> 
  uint coro_call( T* that ) {
    if( setjmp(PointA)==0 ) {
      if( state ) longjmp(PointB,1); 
      call_do_process0(that);
    }
    return state;
  }

  void chkinp( void ) { if( inpptr>=inpend ) yield(this,1); }

  void chkout( void ) { if( outptr>=outend ) yield(this,2); }

  template <int f_limit>
    byte get( void ) { if( f_limit ) chkinp(); return *inpptr++; }

  template <int f_limit>
    void put( uint c ) { *outptr++ = c; if( f_limit ) chkout(); }

  void coro_init( void ) {
    inpptr=inpbeg=inpend = 0;
    outptr=outbeg=outend = 0;
    state = 0;
  }

  uint getinplen() { return inpend-inpptr; }
  uint getoutlen() { return outend-outptr; }

  void addinp( byte* inp,uint inplen ) {
    inpbeg = inpptr = inp;
    inpend = &inp[inplen];
  }

  void addout( byte* out,uint outlen ) {
    outbeg = outptr = out;
    outend = &out[outlen];
  }

};

NOINLINE
void yield( void* p, int value ) { 
  coroutine& q = *(coroutine*)p;
  if( setjmp(q.PointB)==0 ) { q.state=value; longjmp(q.PointA,1); }
}


template< int mode >
struct Rangecoder : coroutine {

  enum {
    NUM   = 4,
    sTOP  = 0x01000000U,
    gTOP  = 0x00010000U,
    Thres = 0xFF000000U,
    Threg = 0x00FF0000U
  };

  uint  range;
  uint  rprec;
  qword lowc;
  uint  code; 
  uint  FFNum;
  uint  Cache;

  void rc_Renorm( void ) {
    if( mode ) {
      while( range<sTOP ) range<<=8, (code<<=8)+=get<1>();
    } else {
      while( range<sTOP ) { range<<=8; ShiftLow(); }
    }
  }

  void rc_BProcess( uint freq, int& b ) { 

    uint rnew = rprec*freq;

    if( mode ) b = (code>=rnew);

    range = ((range-rnew-rnew)&(-b)) + rnew;
    rnew &= -b;

    if( mode ) code -= rnew; else lowc += rnew;

    rc_Renorm();
    rprec = range>>SCALElog;
  }

  void ShiftLow( void ) {
    uint Carry = uint(lowc>>32);
    uint low = uint(lowc);
    if( low<Thres || Carry ) {
      put<1>( Cache+Carry );
      for (;FFNum != 0;FFNum--) put<1>( Carry-1 );
      Cache = low>>24;
      Carry = 0;
    } else FFNum++;
    lowc=(low<<8);
  }

  void rc_Init0( void ) { 
    range = 0xFFFFFFFF;
    rprec = range>>SCALElog;
    lowc  = 0;
    FFNum = 0;
    Cache = 0;
  }
  
  void rc_Init( void ) {
    rc_Init0();
    if( mode==1 ) {
      for(int _=0; _<NUM+1; _++) (code<<=8)+=get<1>(); 
    }
  }

  void rc_Quit( void ) {
    if( mode==0 ) {
      for(int _=0; _<NUM+1; _++) ShiftLow(); 
    }
  }

};


template< int mode >
struct Model : public Rangecoder<mode> {

  uint f_len;

  enum{ CNUM=256 };
  ALIGN(32)
  Counter f0[CNUM];       
  ALIGN(32)
  Counter f1[CNUM][CNUM]; 

  enum {
    inpbufsize = 1<<20,
    outbufsize = 1<<16
  };
  ALIGN(4096)
  byte inpbuf[inpbufsize];
  byte outbuf[outbufsize];

  void do_process( void ) {
    int b,bit,c,i,j,p;

    for( i=0; i<CNUM; i++ ) {
      f0[i].P=hSCALE;
      for( j=0; j<CNUM; j++ ) f1[i][j].P=hSCALE; 
    }

    rc_Init();

    LinearMixer mix( M_mW0 );

    p = 0;
    for( i=0; i<f_len; i++ ) {

      if( mode==0 ) c = get<1>();

      uint ctx=1;
      for( b=7; b>=0; b-- ) {

        if( mode==0 ) bit = (c>>b)&1;

        int p1 = mix.Mixup(f0[ctx].P,f1[p][ctx].P);

        rc_BProcess( p1, bit );

        f0[ctx].Update( bit, M_f0wr, M_f0mw );   
        f1[p][ctx].Update( bit, M_f1wr, M_f1mw ); 

        ctx += ctx + bit;
      }
      c = byte(ctx);

      if( mode==1 ) put<1>(c);
      p = c;
    }

    rc_Quit();

    yield(this,0);

  }

  void processfile( FILE* f, FILE* g, uint _f_len ) {
    f_len = _f_len;
    coro_init();
    addout( outbuf, outbufsize );
    while( 1 ) {
      int r = coro_call(this); 
      if( r==0 ) break;
      if( r==1 ) {
        uint l = fread( inpbuf, 1, inpbufsize, f );
        if( l==0 ) break; 
        addinp( inpbuf, l ); 
      } else if( r==2 ) {
        fwrite( outbuf, 1, outbufsize, g );
        addout( outbuf, outbufsize );
      }
    }
    fwrite( outbuf, 1,outptr-outbuf, g ); 
  }

};

ALIGN(4096) 
static union {
  Model<0> M0;
  Model<1> M1;
};

int main( int argc, char** argv ) {

  if( argc<4 ) return 1;
  FILE* f = fopen( argv[2], "rb" ); if( f==0 ) return 1;
  FILE* g = fopen( argv[3], "wb" ); if( g==0 ) return 2;
  uint f_len = 0;

  if( argv[1][0]=='c' ) {
    f_len = flen( f );
    fwrite( &f_len, 1,4, g );
    M0.processfile( f, g, f_len );
  } else {
    fread( &f_len, 1,4, f );
    M1.processfile( f, g, f_len );
  }

  fclose( f );
  fclose( g );

  return 0;
}

