theacetoace/0001-factoring-obmc-out-of-snow.patch

## 0001-factoring-obmc-out-of-snow.patch
From 0d90ea4ec79409199d6d8b82ac02043dce919ad5 Mon Sep 17 00:00:00 2001
From: Stanislav Dolganov <dolganov@qst.hk>
Date: Thu, 18 Aug 2016 14:07:35 +0300
Subject: [PATCH 1/4] factoring obmc out of snow

---
 libavcodec/Makefile      |    8 +-
 libavcodec/obmc.c        |   61 ++
 libavcodec/obmc.h        |   45 ++
 libavcodec/obme.c        | 1135 +++++++++++++++++++++++++++++++++++++
 libavcodec/obme.h        |   58 ++
 libavcodec/obmemc.c      |  651 +++++++++++++++++++++
 libavcodec/obmemc.h      |  522 +++++++++++++++++
 libavcodec/obmemc_data.h |  132 +++++
 libavcodec/snow.c        |  571 +------------------
 libavcodec/snow.h        |  356 +-----------
 libavcodec/snowdata.h    |  132 -----
 libavcodec/snowdec.c     |  234 +++-----
 libavcodec/snowenc.c     | 1409 ++++++++--------------------------------------
 13 files changed, 2939 insertions(+), 2375 deletions(-)
 create mode 100644 libavcodec/obmc.c
 create mode 100644 libavcodec/obmc.h
 create mode 100644 libavcodec/obme.c
 create mode 100644 libavcodec/obme.h
 create mode 100644 libavcodec/obmemc.c
 create mode 100644 libavcodec/obmemc.h
 create mode 100644 libavcodec/obmemc_data.h
 delete mode 100644 libavcodec/snowdata.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b375720..dbbf9a1 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -511,9 +511,11 @@ OBJS-$(CONFIG_SMACKAUD_DECODER)        += smacker.o
 OBJS-$(CONFIG_SMACKER_DECODER)         += smacker.o
 OBJS-$(CONFIG_SMC_DECODER)             += smc.o
 OBJS-$(CONFIG_SMVJPEG_DECODER)         += smvjpegdec.o
-OBJS-$(CONFIG_SNOW_DECODER)            += snowdec.o snow.o snow_dwt.o
-OBJS-$(CONFIG_SNOW_ENCODER)            += snowenc.o snow.o snow_dwt.o             \
-                                          h263.o ituh263enc.o
+OBJS-$(CONFIG_SNOW_DECODER)            += snowdec.o snow.o snow_dwt.o\
+                                          obmemc.o obmc.o
+OBJS-$(CONFIG_SNOW_ENCODER)            += snowenc.o snow.o snow_dwt.o\
+                                          h263.o ituh263enc.o\
+                                          obmemc.o obme.o
 OBJS-$(CONFIG_SOL_DPCM_DECODER)        += dpcm.o
 OBJS-$(CONFIG_SONIC_DECODER)           += sonic.o
 OBJS-$(CONFIG_SONIC_ENCODER)           += sonic.o
diff --git a/libavcodec/obmc.c b/libavcodec/obmc.c
new file mode 100644
index 0000000..fccad24
--- /dev/null
+++ b/libavcodec/obmc.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "obmc.h"
+
+int ff_obmc_decode_init(OBMCContext *f) {
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(f->avctx->pix_fmt);
+    if (!desc)
+        return AVERROR_INVALIDDATA;
+    int i;
+    f->nb_planes = 0;
+    for (i = 0; i < desc->nb_components; i++)
+        f->nb_planes = FFMAX(f->nb_planes, desc->comp[i].plane + 1);
+
+    avcodec_get_chroma_sub_sample(f->avctx->pix_fmt, &f->chroma_h_shift, &f->chroma_v_shift);
+
+    return 0;
+}
+
+int ff_obmc_predecode_frame(OBMCContext *f) {
+    int plane_index, ret;
+    for(plane_index=0; plane_index < f->nb_planes; plane_index++){
+       PlaneObmc *pc= &f->plane[plane_index];
+       pc->fast_mc= pc->diag_mc && pc->htaps==6 && pc->hcoeff[0]==40
+                                             && pc->hcoeff[1]==-10
+                                             && pc->hcoeff[2]==2;
+    }
+
+    if ((ret = ff_obmc_alloc_blocks(f)) < 0)
+        return ret;
+
+    if ((ret = ff_obmc_frame_start(f)) < 0)
+        return ret;
+
+    f->current_picture->pict_type = f->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+    av_assert0(!f->avmv);
+    if (f->avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) {
+        f->avmv = av_malloc_array(f->b_width * f->b_height, sizeof(AVMotionVector) << (f->block_max_depth*2));
+    }
+    f->avmv_index = 0;
+
+    return 0;
+}
diff --git a/libavcodec/obmc.h b/libavcodec/obmc.h
new file mode 100644
index 0000000..1e218b1
--- /dev/null
+++ b/libavcodec/obmc.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ /**
+ * @file obmc.h
+ * @brief Overlapped block motion compensation functions
+ */
+
+#ifndef AVCODEC_OBMC_H
+#define AVCODEC_OBMC_H
+
+#include "obmemc.h"
+
+ /**
+ * Inits OBMC context parameters needed for decoding process
+ *
+ * @param[in,out] f OBMC context to init
+ */
+int ff_obmc_decode_init(OBMCContext *f);
+
+ /**
+ * Prepares OBMC context for block decoding for each frame
+ *
+ * @param[in,out] f OBMC context to prepare
+ */
+int ff_obmc_predecode_frame(OBMCContext *f);
+
+#endif /* AVCODEC_OBMC_H */
diff --git a/libavcodec/obme.c b/libavcodec/obme.c
new file mode 100644
index 0000000..f442b26
--- /dev/null
+++ b/libavcodec/obme.c
@@ -0,0 +1,1135 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "obme.h"
+#include "h263.h"
+
+int ff_obmc_encode_init(OBMCContext *s, AVCodecContext *avctx)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+    if (!desc)
+        return AVERROR_INVALIDDATA;
+    int plane_index, ret;
+    int i;
+
+    #if FF_API_MOTION_EST
+    FF_DISABLE_DEPRECATION_WARNINGS
+        if (avctx->me_method == ME_ITER)
+            s->motion_est = FF_ME_ITER;
+    FF_ENABLE_DEPRECATION_WARNINGS
+    #endif
+
+    s->mv_scale       = (avctx->flags & AV_CODEC_FLAG_QPEL) ? 2 : 4;
+    s->block_max_depth= (avctx->flags & AV_CODEC_FLAG_4MV ) ? 1 : 0;
+
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        s->plane[plane_index].diag_mc= 1;
+        s->plane[plane_index].htaps= 6;
+        s->plane[plane_index].hcoeff[0]=  40;
+        s->plane[plane_index].hcoeff[1]= -10;
+        s->plane[plane_index].hcoeff[2]=   2;
+        s->plane[plane_index].fast_mc= 1;
+    }
+
+    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
+
+    if ((ret = ff_obmc_alloc_blocks(s)) < 0)
+        return ret;
+
+    s->m.avctx   = avctx;
+    s->m.bit_rate= avctx->bit_rate;
+
+    s->m.me.temp      =
+    s->m.me.scratchpad= av_mallocz_array((avctx->width+64), 2*16*2*sizeof(uint8_t));
+    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.sc.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
+    if (!s->m.me.scratchpad || !s->m.me.map || !s->m.me.score_map || !s->m.sc.obmc_scratchpad)
+        return AVERROR(ENOMEM);
+
+    ff_h263_encode_init(&s->m); //mv_penalty
+
+    s->max_ref_frames = av_clip(avctx->refs, 1, MAX_REF_FRAMES);
+
+    s->nb_planes = 0;
+    for (i = 0; i < desc->nb_components; i++)
+        s->nb_planes = FFMAX(s->nb_planes, desc->comp[i].plane + 1);
+
+    ff_set_cmp(&s->mecc, s->mecc.me_cmp, s->avctx->me_cmp);
+    ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, s->avctx->me_sub_cmp);
+
+    s->input_picture = av_frame_alloc();
+    if (!s->input_picture)
+        return AVERROR(ENOMEM);
+
+    if ((ret = ff_obmc_get_buffer(s, s->input_picture)) < 0)
+        return ret;
+
+    if(s->motion_est == FF_ME_ITER){
+        int size= s->b_width * s->b_height << 2*s->block_max_depth;
+        for(i=0; i<s->max_ref_frames; i++){
+            s->ref_mvs[i]= av_mallocz_array(size, sizeof(int16_t[2]));
+            s->ref_scores[i]= av_mallocz_array(size, sizeof(uint32_t));
+            if (!s->ref_mvs[i] || !s->ref_scores[i])
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+//near copy & paste from dsputil, FIXME
+static int pix_sum(uint8_t * pix, int line_size, int w, int h)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j++) {
+            s += pix[0];
+            pix ++;
+        }
+        pix += line_size - w;
+    }
+    return s;
+}
+
+//near copy & paste from dsputil, FIXME
+static int pix_norm1(uint8_t * pix, int line_size, int w)
+{
+    int s, i, j;
+    uint32_t *sq = ff_square_tab + 256;
+
+    s = 0;
+    for (i = 0; i < w; i++) {
+        for (j = 0; j < w; j ++) {
+            s += sq[pix[0]];
+            pix ++;
+        }
+        pix += line_size - w;
+    }
+    return s;
+}
+
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+    switch(type&0xFF){
+    default:
+    case FF_CMP_SAD:
+        return lambda>>FF_LAMBDA_SHIFT;
+    case FF_CMP_DCT:
+        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_SATD:
+    case FF_CMP_DCT264:
+        return (2*lambda)>>FF_LAMBDA_SHIFT;
+    case FF_CMP_RD:
+    case FF_CMP_PSNR:
+    case FF_CMP_SSE:
+    case FF_CMP_NSSE:
+        return lambda2>>FF_LAMBDA_SHIFT;
+    case FF_CMP_BIT:
+        return 1;
+    }
+}
+
+//FIXME copy&paste
+#define P_LEFT P[1]
+#define P_TOP P[2]
+#define P_TOPRIGHT P[3]
+#define P_MEDIAN P[4]
+#define P_MV1 P[9]
+#define FLAG_QPEL   1 //must be 1
+
+static int encode_q_branch(OBMCContext *s, int level, int x, int y)
+{
+    ObmcCoderContext *const rc = &s->obmc_coder;
+    ObmcCoderContext pc, ic;
+    int score, score2, iscore, block_s, sum;;
+    const int w= s->b_width  << s->block_max_depth;
+    const int h= s->b_height << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<(LOG2_MB_SIZE - level);
+    int trx= (x+1)<<rem_depth;
+    int try= (y+1)<<rem_depth;
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
+    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx, pmy;
+    int mx=0, my=0;
+    int l,cr,cb;
+    const int stride= s->current_picture->linesize[0];
+    const int uvstride= s->current_picture->linesize[1];
+    uint8_t *current_data[3]= { s->input_picture->data[0] + (x + y*  stride)*block_w,
+                                s->input_picture->data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift),
+                                s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
+    int P[10][2];
+    int16_t last_mv[3][2];
+    int qpel= !!(s->avctx->flags & AV_CODEC_FLAG_QPEL); //unused
+    const int shift= 1+qpel;
+    MotionEstContext *c= &s->m.me;
+    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+    int my_context= av_log2(2*FFABS(left->my - top->my));
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+    int ref, best_ref, ref_score, ref_mx, ref_my;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
+        return 0;
+    }
+
+//    clip predictors / edge ?
+
+    P_LEFT[0]= left->mx;
+    P_LEFT[1]= left->my;
+    P_TOP [0]= top->mx;
+    P_TOP [1]= top->my;
+    P_TOPRIGHT[0]= tr->mx;
+    P_TOPRIGHT[1]= tr->my;
+
+    last_mv[0][0]= s->block[index].mx;
+    last_mv[0][1]= s->block[index].my;
+    last_mv[1][0]= right->mx;
+    last_mv[1][1]= right->my;
+    last_mv[2][0]= bottom->mx;
+    last_mv[2][1]= bottom->my;
+
+    s->m.mb_stride=2;
+    s->m.mb_x=
+    s->m.mb_y= 0;
+    c->skip= 0;
+
+    av_assert1(c->  stride ==   stride);
+    av_assert1(c->uvstride == uvstride);
+
+    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
+    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_DMV;
+
+    c->xmin = - x*block_w - 16+3;
+    c->ymin = - y*block_w - 16+3;
+    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
+    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
+
+    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
+    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
+    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
+    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
+    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
+    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
+    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+
+    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+    if (!y) {
+        c->pred_x= P_LEFT[0];
+        c->pred_y= P_LEFT[1];
+    } else {
+        c->pred_x = P_MEDIAN[0];
+        c->pred_y = P_MEDIAN[1];
+    }
+
+    score= INT_MAX;
+    best_ref= 0;
+    for(ref=0; ref<s->ref_frames; ref++){
+        init_ref(c, current_data, s->last_pictures[ref]->data, NULL, block_w*x, block_w*y, 0);
+
+        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
+                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
+
+        av_assert2(ref_mx >= c->xmin);
+        av_assert2(ref_mx <= c->xmax);
+        av_assert2(ref_my >= c->ymin);
+        av_assert2(ref_my <= c->ymax);
+
+        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
+        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
+        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
+        if(s->ref_mvs[ref]){
+            s->ref_mvs[ref][index][0]= ref_mx;
+            s->ref_mvs[ref][index][1]= ref_my;
+            s->ref_scores[ref][index]= ref_score;
+        }
+        if(score > ref_score){
+            score= ref_score;
+            best_ref= ref;
+            mx= ref_mx;
+            my= ref_my;
+        }
+    }
+    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
+
+    // subpel search
+    rc->init_frame_coder(s->avctx, &pc);
+
+    if(level!=s->block_max_depth)
+        pc.put_level_break(&pc, 4 + s_context, 1);
+    pc.put_block_type(&pc, 1 + left->type + top->type, 0);
+    if(s->ref_frames > 1)
+        pc.put_best_ref(&pc, 128 + 1024 + 32*ref_context, best_ref);
+    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
+    pc.put_block_mv(&pc,
+        128 + 32*(mx_context + 16*!!best_ref), 128 + 32*(my_context + 16*!!best_ref),
+        mx - pmx, my - pmy
+    );
+    score += (s->lambda2*pc.get_bits(&pc))>>FF_LAMBDA_SHIFT;
+
+    block_s= block_w*block_w;
+    sum = pix_sum(current_data[0], stride, block_w, block_w);
+    l= (sum + block_s/2)/block_s;
+    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
+
+    if (s->nb_planes > 2) {
+        block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
+        sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
+        cb= (sum + block_s/2)/block_s;
+    //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
+        sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
+        cr= (sum + block_s/2)/block_s;
+    //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
+    }else
+        cb = cr = 0;
+
+    rc->init_frame_coder(s->avctx, &ic);
+
+    if(level!=s->block_max_depth)
+        ic.put_level_break(&ic, 4 + s_context, 1);
+    ic.put_block_type(&ic, 1 + left->type + top->type, 1);
+    ic.put_block_color(&ic, 32, 64, 96, l-pl, cb-pcb, cr-pcr);
+    iscore += (s->lambda2*ic.get_bits(&ic))>>FF_LAMBDA_SHIFT;
+
+    av_assert1(iscore < 255*255*256 + s->lambda2*10);
+    av_assert1(iscore >= 0);
+    av_assert1(l>=0 && l<=255);
+    av_assert1(pl>=0 && pl<=255);
+
+    if(level==0){
+        int varc= iscore >> 8;
+        int vard= score >> 8;
+        if (vard <= 64 || vard < varc)
+            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
+        else
+            c->scene_change_score+= s->m.qscale;
+    }
+
+    if(level!=s->block_max_depth){
+        rc->put_level_break(rc, 4 + s_context, 0);
+        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
+        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
+        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
+        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
+        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
+
+        if(score2 < score && score2 < iscore) {
+            rc->free(&ic); rc->free(&pc);
+            return score2;
+        }
+    }
+
+    if(iscore < score){
+        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
+        rc->copy_coder(&ic);
+        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
+        rc->free(&ic); rc->free(&pc);
+        return iscore;
+    }else{
+        rc->copy_coder(&pc);
+        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
+        rc->free(&ic); rc->free(&pc);
+        return score;
+    }
+}
+
+static void encode_q_branch2(OBMCContext *s, int level, int x, int y)
+{
+    ObmcCoderContext *const rc = &s->obmc_coder;
+    const int w= s->b_width  << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    BlockNode *b= &s->block[index];
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx, pmy;
+    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
+    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
+        return;
+    }
+
+    if(level!=s->block_max_depth){
+        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
+            rc->put_level_break(rc, 4 + s_context, 1);
+        }else{
+            rc->put_level_break(rc, 4 + s_context, 0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
+            return;
+        }
+    }
+    if(b->type & BLOCK_INTRA){
+        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
+        rc->put_block_type(rc, 1 + (left->type&1) + (top->type&1), 1);
+        rc->put_block_color(
+            rc,
+            32, 64, 96,
+            b->color[0]-pl, b->color[1]-pcb, b->color[2]-pcr
+        );
+        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
+    }else{
+        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
+        rc->put_block_type(rc, 1 + (left->type&1) + (top->type&1), 0);
+        if(s->ref_frames > 1)
+            rc->put_best_ref(rc, 128 + 1024 + 32*ref_context, b->ref);
+        rc->put_block_mv(rc,
+            128 + 32*mx_context, 128 + 32*my_context,
+            b->mx - pmx, b->my - pmy
+        );
+        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
+    }
+}
+
+static int get_dc(OBMCContext *s, int mb_x, int mb_y, int plane_index){
+    int i, x2, y2;
+    PlaneObmc *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    const int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *src= s-> input_picture->data[plane_index];
+    IDWTELEM *dst= (IDWTELEM*)s->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int index= mb_x + mb_y*b_stride;
+    BlockNode *b= &s->block[index];
+    BlockNode backup= *b;
+    int ab=0;
+    int aa=0;
+
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
+
+    b->type|= BLOCK_INTRA;
+    b->color[plane_index]= 0;
+    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
+
+    for(i=0; i<4; i++){
+        int mb_x2= mb_x + (i &1) - 1;
+        int mb_y2= mb_y + (i>>1) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_h*mb_y2 + block_h/2;
+
+        add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc,
+                    x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
+
+        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_h); y2++){
+            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
+                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride;
+                int obmc_v= obmc[index];
+                int d;
+                if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
+                if(x<0) obmc_v += obmc[index + block_w];
+                if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
+                if(x+block_w>w) obmc_v += obmc[index - block_w];
+                //FIXME precalculate this or simplify it somehow else
+
+                d = -dst[index] + (1<<(FRAC_BITS-1));
+                dst[index] = d;
+                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
+                aa += obmc_v * obmc_v; //FIXME precalculate this
+            }
+        }
+    }
+    *b= backup;
+
+    return av_clip_uint8( ROUNDED_DIV(ab<<LOG2_OBMC_MAX, aa) ); //FIXME we should not need clipping
+}
+
+static inline int get_block_bits(OBMCContext *s, int x, int y, int w){
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    int index= x + y*b_stride;
+    const BlockNode *b     = &s->block[index];
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
+    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
+    int dmx, dmy;
+//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+//  int my_context= av_log2(2*FFABS(left->my - top->my));
+
+    if(x<0 || x>=b_stride || y>=b_height)
+        return 0;
+/*
+1            0      0
+01X          1-2    1
+001XX        3-6    2-3
+0001XXX      7-14   4-7
+00001XXXX   15-30   8-15
+*/
+//FIXME try accurate rate
+//FIXME intra and inter predictors if surrounding blocks are not the same type
+    if(b->type & BLOCK_INTRA){
+        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
+                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
+                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
+    }else{
+        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
+        dmx-= b->mx;
+        dmy-= b->my;
+        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
+                    + av_log2(2*FFABS(dmy))
+                    + av_log2(2*b->ref));
+    }
+}
+
+static int get_block_rd(OBMCContext *s, int mb_x, int mb_y, int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2]){
+    PlaneObmc *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    const int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst= s->current_picture->data[plane_index];
+    uint8_t *src= s->  input_picture->data[plane_index];
+    IDWTELEM *pred= (IDWTELEM*)s->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4;
+    uint8_t *cur = s->scratchbuf;
+    uint8_t *tmp = s->emu_edge_buffer;
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+    int sx= block_w*mb_x - block_w/2;
+    int sy= block_h*mb_y - block_h/2;
+    int x0= FFMAX(0,-sx);
+    int y0= FFMAX(0,-sy);
+    int x1= FFMIN(block_w*2, w-sx);
+    int y1= FFMIN(block_h*2, h-sy);
+    int i,x,y;
+
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
+
+    ff_obmc_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
+
+    for(y=y0; y<y1; y++){
+        const uint8_t *obmc1= obmc_edged[y];
+        const IDWTELEM *pred1 = pred + y*obmc_stride;
+        uint8_t *cur1 = cur + y*ref_stride;
+        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
+        for(x=x0; x<x1; x++){
+#if FRAC_BITS >= LOG2_OBMC_MAX
+            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
+#else
+            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
+#endif
+            v = (v + pred1[x]) >> FRAC_BITS;
+            if(v&(~255)) v= ~(v>>31);
+            dst1[x] = v;
+        }
+    }
+
+    /* copy the regions where obmc[] = (uint8_t)256 */
+    if(LOG2_OBMC_MAX == 8
+        && (mb_x == 0 || mb_x == b_stride-1)
+        && (mb_y == 0 || mb_y == b_height-1)){
+        if(mb_x == 0)
+            x1 = block_w;
+        else
+            x0 = block_w;
+        if(mb_y == 0)
+            y1 = block_h;
+        else
+            y0 = block_h;
+        for(y=y0; y<y1; y++)
+            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
+    }
+
+    if(block_w==16){
+        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
+        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
+        /* FIXME cmps overlap but do not cover the wavelet's whole support.
+         * So improving the score of one block is not strictly guaranteed
+         * to improve the score of the whole frame, thus iterative motion
+         * estimation does not always converge. */
+        if(s->avctx->me_cmp == FF_CMP_W97)
+            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+        else if(s->avctx->me_cmp == FF_CMP_W53)
+            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+        else{
+            distortion = 0;
+            for(i=0; i<4; i++){
+                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
+                distortion += s->mecc.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
+            }
+        }
+    }else{
+        av_assert2(block_w==8);
+        distortion = s->mecc.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
+    }
+
+    if(plane_index==0){
+        for(i=0; i<4; i++){
+/* ..RRr
+ * .RXx.
+ * rxx..
+ */
+            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
+        }
+        if(mb_x == b_stride-2)
+            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static int get_4block_rd(OBMCContext *s, int mb_x, int mb_y, int plane_index){
+    int i, y2;
+    PlaneObmc *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    const int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst= s->current_picture->data[plane_index];
+    uint8_t *src= s-> input_picture->data[plane_index];
+    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
+    // const has only been removed from zero_dst to suppress a warning
+    static IDWTELEM zero_dst[4096]; //FIXME
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion= 0;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
+
+    for(i=0; i<9; i++){
+        int mb_x2= mb_x + (i%3) - 1;
+        int mb_y2= mb_y + (i/3) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_h*mb_y2 + block_h/2;
+
+        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
+                   x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
+
+        //FIXME find a cleaner/simpler way to skip the outside stuff
+        for(y2= y; y2<0; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        for(y2= h; y2<y+block_h; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        if(x<0){
+            for(y2= y; y2<y+block_h; y2++)
+                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
+        }
+        if(x+block_w > w){
+            for(y2= y; y2<y+block_h; y2++)
+                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
+        }
+
+        av_assert1(block_w== 8 || block_w==16);
+        distortion += s->mecc.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h);
+    }
+
+    if(plane_index==0){
+        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
+        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
+
+/* ..RRRr
+ * .RXXx.
+ * .RXXx.
+ * rxxx.
+ */
+        if(merged)
+            rate = get_block_bits(s, mb_x, mb_y, 2);
+        for(i=merged?4:0; i<9; i++){
+            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
+            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
+        }
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static av_always_inline int check_block(OBMCContext *s, int mb_x, int mb_y, int p[3], int intra, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup= *block;
+    unsigned value;
+    int rd, index;
+
+    av_assert2(mb_x>=0 && mb_y>=0);
+    av_assert2(mb_x<b_stride);
+
+    if(intra){
+        block->color[0] = p[0];
+        block->color[1] = p[1];
+        block->color[2] = p[2];
+        block->type |= BLOCK_INTRA;
+    }else{
+        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
+        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
+        if(s->me_cache[index] == value)
+            return 0;
+        s->me_cache[index]= value;
+
+        block->mx= p[0];
+        block->my= p[1];
+        block->type &= ~BLOCK_INTRA;
+    }
+
+    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged) + s->intra_penalty * !!intra;
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        *block= backup;
+        return 0;
+    }
+}
+
+/* special case for int[2] args we discard afterwards,
+ * fixes compilation problem with gcc 2.95 */
+static av_always_inline int check_block_inter(OBMCContext *s, int mb_x, int mb_y, int p0, int p1, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
+    int p[2] = {p0, p1};
+    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
+}
+
+static av_always_inline int check_4block_inter(OBMCContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup[4];
+    unsigned value;
+    int rd, index;
+
+    /* We don't initialize backup[] during variable declaration, because
+     * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
+     * 'int16_t'". */
+    backup[0] = block[0];
+    backup[1] = block[1];
+    backup[2] = block[b_stride];
+    backup[3] = block[b_stride + 1];
+
+    av_assert2(mb_x>=0 && mb_y>=0);
+    av_assert2(mb_x<b_stride);
+    av_assert2(((mb_x|mb_y)&1) == 0);
+
+    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
+    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
+    if(s->me_cache[index] == value)
+        return 0;
+    s->me_cache[index]= value;
+
+    block->mx= p0;
+    block->my= p1;
+    block->ref= ref;
+    block->type &= ~BLOCK_INTRA;
+    block[1]= block[b_stride]= block[b_stride+1]= *block;
+
+    rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        block[0]= backup[0];
+        block[1]= backup[1];
+        block[b_stride]= backup[2];
+        block[b_stride+1]= backup[3];
+        return 0;
+    }
+}
+
+static void iterative_me(OBMCContext *s){
+    int pass, mb_x, mb_y;
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    int color[3];
+
+    {
+        ObmcCoderContext r;
+        s->obmc_coder.init_frame_coder(s->avctx, &r);
+        for(mb_y= 0; mb_y<s->b_height; mb_y++)
+            for(mb_x= 0; mb_x<s->b_width; mb_x++)
+                encode_q_branch(s, 0, mb_x, mb_y);
+        s->obmc_coder.reset_coder(&r);
+        s->obmc_coder.free(&r);
+    }
+
+    for(pass=0; pass<25; pass++){
+        int change= 0;
+
+        for(mb_y= 0; mb_y<b_height; mb_y++){
+            for(mb_x= 0; mb_x<b_width; mb_x++){
+                int dia_change, i, j, ref;
+                int best_rd= INT_MAX, ref_rd;
+                BlockNode backup, ref_b;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *block= &s->block[index];
+                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
+                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
+                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
+                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
+                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
+                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
+                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
+                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
+                const int b_w= (MB_SIZE >> s->block_max_depth);
+                uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];
+
+                if(pass && (block->type & BLOCK_OPT))
+                    continue;
+                block->type |= BLOCK_OPT;
+
+                backup= *block;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                //FIXME precalculate
+                {
+                    int x, y;
+                    for (y = 0; y < b_w * 2; y++)
+                        memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
+                    if(mb_x==0)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
+                    if(mb_x==b_stride-1)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
+                    if(mb_y==0){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
+                        for(y=1; y<b_w; y++)
+                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
+                    }
+                    if(mb_y==b_height-1){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
+                        for(y=b_w; y<b_w*2-1; y++)
+                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
+                    }
+                }
+
+                //skip stuff outside the picture
+                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
+                    uint8_t *src= s->  input_picture->data[0];
+                    uint8_t *dst= s->current_picture->data[0];
+                    const int stride= s->current_picture->linesize[0];
+                    const int block_w= MB_SIZE >> s->block_max_depth;
+                    const int block_h= MB_SIZE >> s->block_max_depth;
+                    const int sx= block_w*mb_x - block_w/2;
+                    const int sy= block_h*mb_y - block_h/2;
+                    const int w= s->plane[0].width;
+                    const int h= s->plane[0].height;
+                    int y;
+
+                    for(y=sy; y<0; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    for(y=h; y<sy+block_h*2; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    if(sx<0){
+                        for(y=sy; y<sy+block_h*2; y++)
+                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
+                    }
+                    if(sx+block_w*2 > w){
+                        for(y=sy; y<sy+block_h*2; y++)
+                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
+                    }
+                }
+
+                // intra(black) = neighbors' contribution to the current block
+                for(i=0; i < s->nb_planes; i++)
+                    color[i]= get_dc(s, mb_x, mb_y, i);
+
+                // get previous score (cannot be cached due to OBMC)
+                if(pass > 0 && (block->type&BLOCK_INTRA)){
+                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
+                    check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd);
+                }else
+                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);
+
+                ref_b= *block;
+                ref_rd= best_rd;
+                for(ref=0; ref < s->ref_frames; ref++){
+                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
+                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
+                        continue;
+                    block->ref= ref;
+                    best_rd= INT_MAX;
+
+                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
+                    check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
+                    if(tb)
+                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
+                    if(lb)
+                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
+                    if(rb)
+                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
+                    if(bb)
+                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);
+
+                    /* fullpel ME */
+                    //FIXME avoid subpel interpolation / round to nearest integer
+                    do{
+                        int newx = block->mx;
+                        int newy = block->my;
+                        int dia_size = s->iterative_dia_size ? s->iterative_dia_size : FFMAX(s->avctx->dia_size, 1);
+                        dia_change=0;
+                        for(i=0; i < dia_size; i++){
+                            for(j=0; j<i; j++){
+                                dia_change |= check_block_inter(s, mb_x, mb_y, newx+4*(i-j), newy+(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, newx-4*(i-j), newy-(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, newx-(4*j), newy+4*(i-j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, newx+(4*j), newy-4*(i-j), obmc_edged, &best_rd);
+                            }
+                        }
+                    }while(dia_change);
+                    /* subpel ME */
+                    do{
+                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
+                        dia_change=0;
+                        for(i=0; i<8; i++)
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
+                    }while(dia_change);
+                    //FIXME or try the standard 2 pass qpel or similar
+
+                    mvr[0][0]= block->mx;
+                    mvr[0][1]= block->my;
+                    if(ref_rd > best_rd){
+                        ref_rd= best_rd;
+                        ref_b= *block;
+                    }
+                }
+                best_rd= ref_rd;
+                *block= ref_b;
+                check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd);
+                //FIXME RD style color selection
+                if(!same_block(block, &backup)){
+                    if(tb ) tb ->type &= ~BLOCK_OPT;
+                    if(lb ) lb ->type &= ~BLOCK_OPT;
+                    if(rb ) rb ->type &= ~BLOCK_OPT;
+                    if(bb ) bb ->type &= ~BLOCK_OPT;
+                    if(tlb) tlb->type &= ~BLOCK_OPT;
+                    if(trb) trb->type &= ~BLOCK_OPT;
+                    if(blb) blb->type &= ~BLOCK_OPT;
+                    if(brb) brb->type &= ~BLOCK_OPT;
+                    change ++;
+                }
+            }
+        }
+        av_log(s->avctx, AV_LOG_DEBUG, "pass:%d changed:%d\n", pass, change);
+        if(!change)
+            break;
+    }
+
+    if(s->block_max_depth == 1){
+        int change= 0;
+        for(mb_y= 0; mb_y<b_height; mb_y+=2){
+            for(mb_x= 0; mb_x<b_width; mb_x+=2){
+                int i;
+                int best_rd, init_rd;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *b[4];
+
+                b[0]= &s->block[index];
+                b[1]= b[0]+1;
+                b[2]= b[0]+b_stride;
+                b[3]= b[2]+1;
+                if(same_block(b[0], b[1]) &&
+                   same_block(b[0], b[2]) &&
+                   same_block(b[0], b[3]))
+                    continue;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+                //FIXME more multiref search?
+                check_4block_inter(s, mb_x, mb_y,
+                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
+                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
+
+                for(i=0; i<4; i++)
+                    if(!(b[i]->type&BLOCK_INTRA))
+                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
+
+                if(init_rd != best_rd)
+                    change++;
+            }
+        }
+        av_log(s->avctx, AV_LOG_DEBUG, "pass:4mv changed:%d\n", change*4);
+    }
+}
+
+static void encode_blocks(OBMCContext *s, int search){
+    int x, y;
+    int w= s->b_width;
+    int h= s->b_height;
+    ObmcCoderContext *const c = &s->obmc_coder;
+
+    if(s->motion_est == FF_ME_ITER && !s->keyframe && search)
+        iterative_me(s);
+
+    for(y=0; y<h; y++){
+        if(c->available_bytes(c) < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
+            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+            return;
+        }
+        for(x=0; x<w; x++){
+            if(s->motion_est == FF_ME_ITER || !search)
+                encode_q_branch2(s, 0, x, y);
+            else
+                encode_q_branch (s, 0, x, y);
+        }
+    }
+}
+
+int ff_obmc_pre_encode_frame(OBMCContext *s, AVCodecContext *avctx, const AVFrame *pict)
+{
+    const int width= s->avctx->width;
+    const int height= s->avctx->height;
+
+    int ret;
+    if (s->current_picture->data[0]
+    #if FF_API_EMU_EDGE
+            && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)
+    #endif
+        ) {
+        int w = s->avctx->width;
+        int h = s->avctx->height;
+
+        s->mpvencdsp.draw_edges(s->current_picture->data[0],
+                                s->current_picture->linesize[0], w   , h   ,
+                                EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+        if (s->current_picture->data[2]) {
+            s->mpvencdsp.draw_edges(s->current_picture->data[1],
+                                    s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
+                                    EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
+            s->mpvencdsp.draw_edges(s->current_picture->data[2],
+                                    s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
+                                    EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
+        }
+    }
+
+    if ((ret = ff_obmc_frame_start(s)) < 0)
+        return ret;
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    av_frame_unref(avctx->coded_frame);
+    ret = av_frame_ref(avctx->coded_frame, s->current_picture);
+    if (ret < 0)
+        return ret;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    s->m.current_picture_ptr= &s->m.current_picture;
+    s->m.current_picture.f = s->current_picture;
+    s->m.current_picture.f->pts = pict->pts;
+    if(s->m.pict_type == AV_PICTURE_TYPE_P){
+        int block_width = (width +15)>>4;
+        int block_height= (height+15)>>4;
+        int stride= s->current_picture->linesize[0];
+
+        av_assert0(s->current_picture->data[0]);
+        av_assert0(s->last_pictures[0]->data[0]);
+
+        s->m.avctx= s->avctx;
+        s->m.last_picture.f = s->last_pictures[0];
+        s->m.new_picture.f = s->input_picture;
+        s->m.last_picture_ptr= &s->m.last_picture;
+        s->m.linesize = stride;
+        s->m.uvlinesize= s->current_picture->linesize[1];
+        s->m.width = width;
+        s->m.height= height;
+        s->m.mb_width = block_width;
+        s->m.mb_height= block_height;
+        s->m.mb_stride=   s->m.mb_width+1;
+        s->m.b8_stride= 2*s->m.mb_width+1;
+        s->m.f_code=1;
+        /* s->m.pict_type = pic->pict_type; // DELETED */
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
+        s->m.me_method= s->avctx->me_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        s->m.motion_est= s->motion_est;
+        s->m.me.scene_change_score=0;
+        s->m.me.dia_size = avctx->dia_size;
+        s->m.quarter_sample= (s->avctx->flags & AV_CODEC_FLAG_QPEL)!=0;
+        s->m.out_format= FMT_H263;
+        s->m.unrestricted_mv= 1;
+
+        s->m.lambda = s->lambda;
+        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
+
+        s->m.mecc= s->mecc; //move
+        s->m.qdsp= s->qdsp; //move
+        s->m.hdsp = s->hdsp;
+        ff_init_me(&s->m);
+        s->hdsp = s->m.hdsp;
+        s->mecc= s->m.mecc;
+    }
+
+    return 0;
+}
+
+void ff_obmc_encode_blocks(OBMCContext *s, int search)
+{
+    encode_blocks(s, search);
+}
diff --git a/libavcodec/obme.h b/libavcodec/obme.h
new file mode 100644
index 0000000..08b4760
--- /dev/null
+++ b/libavcodec/obme.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ /**
+ * @file obme.h
+ * @brief Overlapped block motion estimation functions
+ */
+
+#ifndef AVCODEC_OBME_H
+#define AVCODEC_OBME_H
+
+#include "obmemc.h"
+
+#define FF_ME_ITER 50
+
+ /**
+ * Inits OBMC context parameters needed for encoding process
+ *
+ * @param[in,out] s OBMC context to init
+ * @param[in] avctx Codec context to retrieve some initial values
+ */
+int ff_obmc_encode_init(OBMCContext *s, AVCodecContext *avctx);
+
+ /**
+ * Prepares OBMC context for block encoding for each frame
+ *
+ * @param[in,out] f OBMC context to prepare
+ * @param[in] avctx Codec context to retrieve some required values
+ * @param[in] pict Frame to encode
+ */
+int ff_obmc_pre_encode_frame(OBMCContext *f, AVCodecContext *avctx, const AVFrame *pict);
+
+ /**
+ * Starts encoding blocks
+ *
+ * @param[in,out] s OBMC context
+ * @param[in] search Defines if reference blocks should be searched for
+ */
+void ff_obmc_encode_blocks(OBMCContext *s, int search);
+
+#endif /* AVCODEC_OBME_H */
diff --git a/libavcodec/obmemc.c b/libavcodec/obmemc.c
new file mode 100644
index 0000000..314b1b3
--- /dev/null
+++ b/libavcodec/obmemc.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "obmemc.h"
+#include "obmemc_data.h"
+
+int ff_obmc_get_buffer(OBMCContext *s, AVFrame *frame)
+{
+    int ret, i;
+    int edges_needed = av_codec_is_encoder(s->avctx->codec);
+
+    frame->width  = s->avctx->width ;
+    frame->height = s->avctx->height;
+    if (edges_needed) {
+        frame->width  += 2 * EDGE_WIDTH;
+        frame->height += 2 * EDGE_WIDTH;
+    }
+    if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+    if (edges_needed) {
+        for (i = 0; frame->data[i]; i++) {
+            int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
+                            frame->linesize[i] +
+                            (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
+            frame->data[i] += offset;
+        }
+        frame->width  = s->avctx->width;
+        frame->height = s->avctx->height;
+    }
+
+    return 0;
+}
+
+int ff_obmc_alloc_blocks(OBMCContext *s)
+{
+    int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
+    int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
+
+    s->b_width = w;
+    s->b_height= h;
+
+    av_free(s->block);
+    s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
+    if (!s->block)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold void init_qexp(void)
+{
+    int i;
+    double v=128;
+
+    for(i=0; i<QROOT; i++)
+    {
+        ff_qexp[i]= lrintf(v);
+        v *= pow(2, 1.0 / QROOT);
+    }
+}
+
+static void mc_block(PlaneObmc *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
+    static const uint8_t weight[64]={
+    8,7,6,5,4,3,2,1,
+    7,7,0,0,0,0,0,1,
+    6,0,6,0,0,0,2,0,
+    5,0,0,5,0,3,0,0,
+    4,0,0,0,4,0,0,0,
+    3,0,0,5,0,3,0,0,
+    2,0,6,0,0,0,2,0,
+    1,7,0,0,0,0,0,1,
+    };
+
+    static const uint8_t brane[256]={
+    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
+    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
+    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
+    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
+    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
+    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
+    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
+    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
+    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
+    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
+    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
+    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
+    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
+    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
+    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
+    };
+
+    static const uint8_t needs[16]={
+    0,1,0,0,
+    2,4,2,0,
+    0,1,0,0,
+    15
+    };
+
+    int x, y, b, r, l;
+    int16_t tmpIt   [64*(32+HTAPS_MAX)];
+    uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
+    int16_t *tmpI= tmpIt;
+    uint8_t *tmp2= tmp2t[0];
+    const uint8_t *hpel[11];
+    av_assert2(dx<16 && dy<16);
+    r= brane[dx + 16*dy]&15;
+    l= brane[dx + 16*dy]>>4;
+
+    b= needs[l] | needs[r];
+    if(p && !p->diag_mc)
+        b= 15;
+
+    if(b&5){
+        for(y=0; y < b_h+HTAPS_MAX-1; y++){
+            for(x=0; x < b_w; x++){
+                int a_1=src[x + HTAPS_MAX/2-4];
+                int a0= src[x + HTAPS_MAX/2-3];
+                int a1= src[x + HTAPS_MAX/2-2];
+                int a2= src[x + HTAPS_MAX/2-1];
+                int a3= src[x + HTAPS_MAX/2+0];
+                int a4= src[x + HTAPS_MAX/2+1];
+                int a5= src[x + HTAPS_MAX/2+2];
+                int a6= src[x + HTAPS_MAX/2+3];
+                int am=0;
+                if(!p || p->fast_mc){
+                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+                    tmpI[x]= am;
+                    am= (am+16)>>5;
+                }else{
+                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
+                    tmpI[x]= am;
+                    am= (am+32)>>6;
+                }
+
+                if(am&(~255)) am= ~(am>>31);
+                tmp2[x]= am;
+            }
+            tmpI+= 64;
+            tmp2+= 64;
+            src += stride;
+        }
+        src -= stride*y;
+    }
+    src += HTAPS_MAX/2 - 1;
+    tmp2= tmp2t[1];
+
+    if(b&2){
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w+1; x++){
+                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
+                int a0= src[x + (HTAPS_MAX/2-3)*stride];
+                int a1= src[x + (HTAPS_MAX/2-2)*stride];
+                int a2= src[x + (HTAPS_MAX/2-1)*stride];
+                int a3= src[x + (HTAPS_MAX/2+0)*stride];
+                int a4= src[x + (HTAPS_MAX/2+1)*stride];
+                int a5= src[x + (HTAPS_MAX/2+2)*stride];
+                int a6= src[x + (HTAPS_MAX/2+3)*stride];
+                int am=0;
+                if(!p || p->fast_mc)
+                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
+                else
+                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
+
+                if(am&(~255)) am= ~(am>>31);
+                tmp2[x]= am;
+            }
+            src += stride;
+            tmp2+= 64;
+        }
+        src -= stride*y;
+    }
+    src += stride*(HTAPS_MAX/2 - 1);
+    tmp2= tmp2t[2];
+    tmpI= tmpIt;
+    if(b&4){
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w; x++){
+                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
+                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
+                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
+                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
+                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
+                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
+                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
+                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
+                int am=0;
+                if(!p || p->fast_mc)
+                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
+                else
+                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
+                if(am&(~255)) am= ~(am>>31);
+                tmp2[x]= am;
+            }
+            tmpI+= 64;
+            tmp2+= 64;
+        }
+    }
+
+    hpel[ 0]= src;
+    hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
+    hpel[ 2]= src + 1;
+
+    hpel[ 4]= tmp2t[1];
+    hpel[ 5]= tmp2t[2];
+    hpel[ 6]= tmp2t[1] + 1;
+
+    hpel[ 8]= src + stride;
+    hpel[ 9]= hpel[1] + 64;
+    hpel[10]= hpel[8] + 1;
+
+#define MC_STRIDE(x) (needs[x] ? 64 : stride)
+
+    if(b==15){
+        int dxy = dx / 8 + dy / 8 * 4;
+        const uint8_t *src1 = hpel[dxy    ];
+        const uint8_t *src2 = hpel[dxy + 1];
+        const uint8_t *src3 = hpel[dxy + 4];
+        const uint8_t *src4 = hpel[dxy + 5];
+        int stride1 = MC_STRIDE(dxy);
+        int stride2 = MC_STRIDE(dxy + 1);
+        int stride3 = MC_STRIDE(dxy + 4);
+        int stride4 = MC_STRIDE(dxy + 5);
+        dx&=7;
+        dy&=7;
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w; x++){
+                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
+                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
+            }
+            src1+=stride1;
+            src2+=stride2;
+            src3+=stride3;
+            src4+=stride4;
+            dst +=stride;
+        }
+    }else{
+        const uint8_t *src1= hpel[l];
+        const uint8_t *src2= hpel[r];
+        int stride1 = MC_STRIDE(l);
+        int stride2 = MC_STRIDE(r);
+        int a= weight[((dx&7) + (8*(dy&7)))];
+        int b= 8-a;
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w; x++){
+                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
+            }
+            src1+=stride1;
+            src2+=stride2;
+            dst +=stride;
+        }
+    }
+}
+
+void ff_obmc_pred_block(OBMCContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride,
+                     int sx, int sy, int b_w, int b_h, const BlockNode *block,
+                     int plane_index, int w, int h)
+{
+    if(block->type & BLOCK_INTRA){
+        int x, y;
+        const unsigned color  = block->color[plane_index];
+        const unsigned color4 = color*0x01010101;
+        if(b_w==32){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+                *(uint32_t*)&dst[16+ y*stride]= color4;
+                *(uint32_t*)&dst[20+ y*stride]= color4;
+                *(uint32_t*)&dst[24+ y*stride]= color4;
+                *(uint32_t*)&dst[28+ y*stride]= color4;
+            }
+        }else if(b_w==16){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+            }
+        }else if(b_w==8){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+            }
+        }else if(b_w==4){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+            }
+        }else{
+            for(y=0; y < b_h; y++){
+                for(x=0; x < b_w; x++){
+                    dst[x + y*stride]= color;
+                }
+            }
+        }
+    }else{
+        uint8_t *src= s->last_pictures[block->ref]->data[plane_index];
+        const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
+        int mx= block->mx*scale;
+        int my= block->my*scale;
+        const int dx= mx&15;
+        const int dy= my&15;
+        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
+        sx += (mx>>4) - (HTAPS_MAX/2-1);
+        sy += (my>>4) - (HTAPS_MAX/2-1);
+        src += sx + sy*stride;
+        if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
+           || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
+            s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
+                                     stride, stride,
+                                     b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
+                                     sx, sy, w, h);
+            src= tmp + MB_SIZE;
+        }
+
+        av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
+
+        av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
+        if(    (dx&3) || (dy&3)
+            || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
+            || (b_w&(b_w-1))
+            || b_w == 1
+            || b_h == 1
+            || !s->plane[plane_index].fast_mc )
+            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
+        else if(b_w==32){
+            int y;
+            for(y=0; y<b_h; y+=16){
+                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
+                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
+            }
+        }else if(b_w==b_h)
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
+        else if(b_w==2*b_h){
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
+        }else{
+            av_assert2(2*b_w==b_h);
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
+        }
+    }
+}
+
+#define mca(dx,dy,b_w)\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
+    av_assert2(h==b_w);\
+    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
+}
+
+mca( 0, 0,16)
+mca( 8, 0,16)
+mca( 0, 8,16)
+mca( 8, 8,16)
+mca( 0, 0,8)
+mca( 8, 0,8)
+mca( 0, 8,8)
+mca( 8, 8,8)
+
+av_cold int ff_obmc_common_init(OBMCContext *s, AVCodecContext *avctx)
+{
+    s->avctx = avctx;
+
+    s->obmc_coder.priv_data = NULL;
+    s->obmc_coder.avctx = NULL;
+
+    int width, height, i, j;
+
+    width = avctx->width;
+    height = avctx->height;
+
+    ff_me_cmp_init(&s->mecc, avctx);
+    ff_hpeldsp_init(&s->hdsp, avctx->flags);
+    ff_videodsp_init(&s->vdsp, 8);
+    ff_dwt_init(&s->dwt);
+    ff_h264qpel_init(&s->h264qpel, 8);
+
+    s->max_ref_frames = 1; //just make sure it's not an invalid value in case of no initial keyframe
+
+#define mcf(dx,dy)\
+    s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
+    s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
+        s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
+    s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
+    s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
+        s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
+
+    mcf( 0, 0)
+    mcf( 4, 0)
+    mcf( 8, 0)
+    mcf(12, 0)
+    mcf( 0, 4)
+    mcf( 4, 4)
+    mcf( 8, 4)
+    mcf(12, 4)
+    mcf( 0, 8)
+    mcf( 4, 8)
+    mcf( 8, 8)
+    mcf(12, 8)
+    mcf( 0,12)
+    mcf( 4,12)
+    mcf( 8,12)
+    mcf(12,12)
+
+#define mcfh(dx,dy)\
+    s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
+    s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
+        mc_block_hpel ## dx ## dy ## 16;\
+    s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
+    s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
+        mc_block_hpel ## dx ## dy ## 8;
+
+    mcfh(0, 0)
+    mcfh(8, 0)
+    mcfh(0, 8)
+    mcfh(8, 8)
+
+    init_qexp();
+
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
+
+    for(i=0; i<MAX_REF_FRAMES; i++) {
+        for(j=0; j<MAX_REF_FRAMES; j++)
+            ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
+        s->last_pictures[i] = av_frame_alloc();
+        if (!s->last_pictures[i])
+            goto fail;
+    }
+
+    s->mconly_picture = av_frame_alloc();
+    s->current_picture = av_frame_alloc();
+    if (!s->mconly_picture || !s->current_picture)
+        goto fail;
+
+    return 0;
+fail:
+    return AVERROR(ENOMEM);
+}
+
+int ff_obmc_common_init_after_header(OBMCContext *s)
+{
+    int plane_index;
+    int ret, emu_buf_size;
+
+    if(!s->scratchbuf) {
+        if ((ret = ff_get_buffer(s->avctx, s->mconly_picture, AV_GET_BUFFER_FLAG_REF)) < 0)
+            return ret;
+        FF_ALLOCZ_ARRAY_OR_GOTO(s->avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*s->avctx->width+256), 7*MB_SIZE, fail);
+        emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*s->avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
+        FF_ALLOC_OR_GOTO(s->avctx, s->emu_edge_buffer, emu_buf_size, fail);
+    }
+
+    if(s->mconly_picture->format != s->avctx->pix_fmt) {
+        av_log(s->avctx, AV_LOG_ERROR, "pixel format changed\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        int w= s->avctx->width;
+        int h= s->avctx->height;
+
+        if(plane_index){
+            w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
+            h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
+        }
+        s->plane[plane_index].width = w;
+        s->plane[plane_index].height= h;
+    }
+
+    return 0;
+fail:
+    return AVERROR(ENOMEM);
+}
+
+static int halfpel_interpol(OBMCContext *s, uint8_t *halfpel[4][4], AVFrame *frame)
+{
+    int p,x,y;
+
+    for(p=0; p < s->nb_planes; p++){
+        int is_chroma= !!p;
+        int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
+        int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
+        int ls= frame->linesize[p];
+        uint8_t *src= frame->data[p];
+
+        halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
+        halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
+        halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
+        if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
+            av_freep(&halfpel[1][p]);
+            av_freep(&halfpel[2][p]);
+            av_freep(&halfpel[3][p]);
+            return AVERROR(ENOMEM);
+        }
+        halfpel[1][p] += EDGE_WIDTH * (1 + ls);
+        halfpel[2][p] += EDGE_WIDTH * (1 + ls);
+        halfpel[3][p] += EDGE_WIDTH * (1 + ls);
+
+        halfpel[0][p]= src;
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= y*ls + x;
+
+                halfpel[1][p][i]= (
+                    20*(src[i] + src[i+1<h*w?i+1:i-1]) -
+                    5*(src[i-1<0?i+1:i-1] + src[i+2<h*w?i+2:i-2]) +
+                    (src[i-2<0?i+2:i-2] + src[i+3<h*w?i+3:i-3]) + 16
+                )>>5;
+            }
+        }
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= y*ls + x;
+
+                halfpel[2][p][i]= (
+                    20*(src[i] + src[i+ls<h*w?i+ls:i-ls]) -
+                    5*(src[i-ls<0?i+ls:i-ls] + src[i+2*ls<h*w?i+2*ls:i-2*ls]) +
+                    (src[i-2*ls<0?i+2*ls:i-2*ls] + src[i+3*ls<h*w?i+3*ls:i-3*ls]) + 16
+                )>>5;
+            }
+        }
+        src= halfpel[1][p];
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= y*ls + x;
+
+                halfpel[3][p][i]= (
+                    20*(src[i] + src[i+ls<h*w?i+ls:i-ls]) -
+                    5*(src[i-ls<0?i+ls:i-ls] + src[i+2*ls<h*w?i+2*ls:i-2*ls]) +
+                    (src[i-2*ls<0?i+2*ls:i-2*ls] + src[i+3*ls<h*w?i+3*ls:i-3*ls]) + 16
+                )>>5;
+            }
+        }
+
+//FIXME border!
+    }
+    return 0;
+}
+
+void ff_obmc_release_buffer(OBMCContext *s)
+{
+    int i;
+
+    if(s->last_pictures[s->max_ref_frames-1]->data[0]){
+        av_frame_unref(s->last_pictures[s->max_ref_frames-1]);
+        for(i=0; i<9; i++)
+            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
+                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
+                s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
+            }
+    }
+}
+
+int ff_obmc_frame_start(OBMCContext *s)
+{
+    AVFrame *tmp;
+    int i, ret;
+
+    int USE_HALFPEL_PLANE = 0;
+
+    switch(s->avctx->codec_id) {
+    case AV_CODEC_ID_FFV1:
+        USE_HALFPEL_PLANE = 1;
+        break;
+    default:
+        break;
+    }
+
+    ff_obmc_release_buffer(s);
+
+    tmp= s->last_pictures[s->max_ref_frames-1];
+    for(i=s->max_ref_frames-1; i>0; i--)
+        s->last_pictures[i] = s->last_pictures[i-1];
+    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
+    if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
+        if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
+            return ret;
+    }
+    s->last_pictures[0] = s->current_picture;
+    s->current_picture = tmp;
+    av_frame_copy_props(s->current_picture, s->last_pictures[0]);
+
+    if (s->keyframe) {
+        s->ref_frames= 0;
+    } else {
+        int i;
+        for(i=0; i<s->max_ref_frames && s->last_pictures[i]->data[0]; i++)
+            if(i && s->last_pictures[i-1]->key_frame)
+                break;
+        s->ref_frames= i;
+        if(s->ref_frames==0){
+            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+    if ((ret = ff_obmc_get_buffer(s, s->current_picture)) < 0)
+        return ret;
+
+    s->current_picture->key_frame= s->keyframe;
+
+    return 0;
+}
+
+av_cold int ff_obmc_close(OBMCContext *s)
+{
+    int i;
+
+    av_freep(&s->spatial_idwt_buffer);
+
+    s->m.me.temp= NULL;
+    av_freep(&s->m.me.scratchpad);
+    av_freep(&s->m.me.map);
+    av_freep(&s->m.me.score_map);
+    av_freep(&s->m.sc.obmc_scratchpad);
+
+    av_freep(&s->block);
+    av_freep(&s->scratchbuf);
+    av_freep(&s->emu_edge_buffer);
+
+    for(i=0; i<MAX_REF_FRAMES; i++){
+        av_freep(&s->ref_mvs[i]);
+        av_freep(&s->ref_scores[i]);
+        if(s->last_pictures[i] && s->last_pictures[i]->data[0]) {
+            av_assert0(s->last_pictures[i]->data[0] != s->current_picture->data[0]);
+        }
+        av_frame_free(&s->last_pictures[i]);
+    }
+
+    av_frame_free(&s->mconly_picture);
+    av_frame_free(&s->current_picture);
+
+    return 0;
+}
diff --git a/libavcodec/obmemc.h b/libavcodec/obmemc.h
new file mode 100644
index 0000000..00e079c
--- /dev/null
+++ b/libavcodec/obmemc.h
@@ -0,0 +1,522 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ /**
+ * @file obmemc.h
+ * @brief Overlapped block motion estimation and compensation
+ */
+
+#ifndef AVCODEC_OBMEMC_H
+#define AVCODEC_OBMEMC_H
+
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/motion_vector.h"
+
+#include "rangecoder.h"
+
+#include "hpeldsp.h"
+#include "me_cmp.h"
+#include "qpeldsp.h"
+#include "snow_dwt.h"
+
+#include "mpegvideo.h"
+#include "h264qpel.h"
+
+#define MAX_PLANES 4
+#define QSHIFT 5
+#define QROOT (1<<QSHIFT)
+#define LOSSLESS_QLOG -128
+#define FRAC_BITS 4
+#define MAX_REF_FRAMES 8
+
+#define LOG2_OBMC_MAX 8
+#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
+typedef struct BlockNode{
+    int16_t mx;         ///< Motion vector component X, see mv_scale
+    int16_t my;         ///< Motion vector component Y, see mv_scale
+    uint8_t ref;        ///< Reference frame index
+    uint8_t color[3];   ///< Color for intra
+    uint8_t type;       ///< Bitfield of BLOCK_*
+//#define TYPE_SPLIT    1
+#define BLOCK_INTRA   1    ///< Intra block, inter otherwise
+#define BLOCK_OPT     2    ///< Block needs no checks in this round of iterative motion estiation
+//#define TYPE_NOCOLOR  4
+    uint8_t level; //FIXME merge into type?
+}BlockNode;
+
+static const BlockNode null_block= { //FIXME add border maybe
+    .color= {128,128,128},
+    .mx= 0,
+    .my= 0,
+    .ref= 0,
+    .type= 0,
+    .level= 0,
+};
+
+#define LOG2_MB_SIZE 4
+#define MB_SIZE (1<<LOG2_MB_SIZE)
+#define ENCODER_EXTRA_BITS 4
+#define HTAPS_MAX 8
+
+typedef struct PlaneObmc{
+    int width;
+    int height;
+
+    int htaps;
+    int8_t hcoeff[HTAPS_MAX/2];
+    int diag_mc;
+    int fast_mc;
+
+    int last_htaps;
+    int8_t last_hcoeff[HTAPS_MAX/2];
+    int last_diag_mc;
+} PlaneObmc;
+
+/**
+ * @struct ObmcCoderContext
+ * @file obmemc.h
+ * @brief struct that stores callbacks to interact with the bitstream encoder
+ */
+typedef struct ObmcCoderContext {
+    void *priv_data;
+    AVCodecContext *avctx;
+
+    int (*get_bits)         (struct ObmcCoderContext *);
+    int (*available_bytes)  (struct ObmcCoderContext *);
+
+    // UTILS
+    void (*init_frame_coder)  (AVCodecContext *, struct ObmcCoderContext *);
+    void (*copy_coder)        (struct ObmcCoderContext *);
+    void (*reset_coder)       (struct ObmcCoderContext *);
+    void (*free)              (struct ObmcCoderContext *);
+
+    // ENCODER
+    void (*put_level_break) (struct ObmcCoderContext *, int, int);
+    void (*put_block_type)  (struct ObmcCoderContext *, int, int);
+    void (*put_best_ref)    (struct ObmcCoderContext *, int, int);
+    void (*put_block_mv)    (struct ObmcCoderContext *, int, int, int, int);
+    void (*put_block_color) (struct ObmcCoderContext *, int, int, int, int, int, int);
+
+} ObmcCoderContext;
+
+typedef struct OBMCContext {
+    AVCodecContext *avctx;
+    int keyframe;
+    int chroma_h_shift, chroma_v_shift;
+
+    ObmcCoderContext obmc_coder;
+
+    /* ME/MC part */
+    MECmpContext mecc;
+    HpelDSPContext hdsp;
+    QpelDSPContext qdsp;
+    VideoDSPContext vdsp;
+    H264QpelContext h264qpel;
+    MpegvideoEncDSPContext mpvencdsp;
+    SnowDWTContext dwt;
+
+    AVFrame *input_picture;              ///< new_picture with the internal linesizes
+    AVFrame *current_picture;
+    AVFrame *last_pictures[MAX_REF_FRAMES];
+    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
+    AVFrame *mconly_picture;
+
+    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
+
+    uint8_t *scratchbuf;
+    uint8_t *emu_edge_buffer;
+
+    AVMotionVector *avmv;
+    int avmv_index;
+
+    int motion_est;
+    int intra_penalty;
+    int lambda;
+    int lambda2;
+    int max_ref_frames;
+    int ref_frames;
+
+    BlockNode *block;
+    int b_width;
+    int b_height;
+    int block_max_depth;
+    int last_block_max_depth;
+
+    uint32_t *ref_scores[MAX_REF_FRAMES];
+    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
+
+#define ME_CACHE_SIZE 1024
+    unsigned me_cache[ME_CACHE_SIZE];
+    unsigned me_cache_generation;
+    int mv_scale;
+    int last_mv_scale;
+    int iterative_dia_size;
+
+    IDWTELEM *spatial_idwt_buffer;
+
+    PlaneObmc plane[MAX_PLANES];
+
+    int nb_planes;
+} OBMCContext;
+
+/* Tables */
+extern const uint8_t * const ff_obmc_tab[4];
+extern uint8_t ff_qexp[QROOT];
+extern int ff_scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
+
+int ff_obmc_get_buffer(OBMCContext *s, AVFrame *frame);
+void ff_obmc_release_buffer(OBMCContext *s);
+int ff_obmc_common_init_after_header(OBMCContext *s);
+int ff_obmc_frame_start(OBMCContext *f);
+int ff_obmc_alloc_blocks(OBMCContext *s);
+int ff_obmc_common_init(OBMCContext *s, AVCodecContext *avctx);
+int ff_obmc_close(OBMCContext *s);
+void ff_obmc_pred_block(OBMCContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride,
+                     int sx, int sy, int b_w, int b_h, const BlockNode *block,
+                     int plane_index, int w, int h);
+
+static inline void pred_mv(OBMCContext *s, int *mx, int *my, int ref,
+                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
+    if(s->ref_frames == 1){
+        *mx = mid_pred(left->mx, top->mx, tr->mx);
+        *my = mid_pred(left->my, top->my, tr->my);
+    }else{
+        const int *scale = ff_scale_mv_ref[ref];
+        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
+                       (top ->mx * scale[top ->ref] + 128) >>8,
+                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
+        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
+                       (top ->my * scale[top ->ref] + 128) >>8,
+                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
+    }
+}
+
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
+    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
+        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
+    }else{
+        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
+    }
+}
+
+static av_always_inline void add_yblock(OBMCContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    BlockNode *lt= &s->block[b_x + b_y*b_stride];
+    BlockNode *rt= lt+1;
+    BlockNode *lb= lt+b_stride;
+    BlockNode *rb= lb+1;
+    uint8_t *block[4];
+    // When src_stride is large enough, it is possible to interleave the blocks.
+    // Otherwise the blocks are written sequentially in the tmp buffer.
+    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
+    uint8_t *tmp = s->scratchbuf;
+    uint8_t *ptmp;
+    int x,y;
+
+    if(b_x<0){
+        lt= rt;
+        lb= rb;
+    }else if(b_x + 1 >= b_width){
+        rt= lt;
+        rb= lb;
+    }
+    if(b_y<0){
+        lt= lb;
+        rt= rb;
+    }else if(b_y + 1 >= b_height){
+        lb= lt;
+        rb= rt;
+    }
+
+    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
+        obmc -= src_x;
+        b_w += src_x;
+        if(!sliced && !offset_dst)
+            dst -= src_x;
+        src_x=0;
+    }
+    if(src_x + b_w > w){
+        b_w = w - src_x;
+    }
+    if(src_y<0){
+        obmc -= src_y*obmc_stride;
+        b_h += src_y;
+        if(!sliced && !offset_dst)
+            dst -= src_y*dst_stride;
+        src_y=0;
+    }
+    if(src_y + b_h> h){
+        b_h = h - src_y;
+    }
+
+    if(b_w<=0 || b_h<=0) return;
+
+    if(!sliced && offset_dst)
+        dst += src_x + src_y*dst_stride;
+    dst8+= src_x + src_y*src_stride;
+//    src += src_x + src_y*src_stride;
+
+    ptmp= tmp + 3*tmp_step;
+    block[0]= ptmp;
+    ptmp+=tmp_step;
+    ff_obmc_pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
+
+    if(same_block(lt, rt)){
+        block[1]= block[0];
+    }else{
+        block[1]= ptmp;
+        ptmp+=tmp_step;
+        ff_obmc_pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
+    }
+
+    if(same_block(lt, lb)){
+        block[2]= block[0];
+    }else if(same_block(rt, lb)){
+        block[2]= block[1];
+    }else{
+        block[2]= ptmp;
+        ptmp+=tmp_step;
+        ff_obmc_pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
+    }
+
+    if(same_block(lt, rb) ){
+        block[3]= block[0];
+    }else if(same_block(rt, rb)){
+        block[3]= block[1];
+    }else if(same_block(lb, rb)){
+        block[3]= block[2];
+    }else{
+        block[3]= ptmp;
+        ff_obmc_pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
+    }
+    if(sliced){
+        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    }else{
+        for(y=0; y<b_h; y++){
+            //FIXME ugly misuse of obmc_stride
+            const uint8_t *obmc1= obmc + y*obmc_stride;
+            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+            for(x=0; x<b_w; x++){
+                int v=   obmc1[x] * block[3][x + y*src_stride]
+                        +obmc2[x] * block[2][x + y*src_stride]
+                        +obmc3[x] * block[1][x + y*src_stride]
+                        +obmc4[x] * block[0][x + y*src_stride];
+
+                v <<= 8 - LOG2_OBMC_MAX;
+                if(FRAC_BITS != 8) {
+                    v >>= 8 - FRAC_BITS;
+                }
+                if(add) {
+                    v += dst[x + y*dst_stride];
+                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*src_stride] = v;
+                } else {
+                    dst[x + y*dst_stride] -= v;
+                }
+            }
+        }
+    }
+}
+
+static av_always_inline void predict_slice(OBMCContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
+    PlaneObmc *p= &s->plane[plane_index];
+    const int mb_w= s->b_width  << s->block_max_depth;
+    const int mb_h= s->b_height << s->block_max_depth;
+    int x, y, mb_x;
+    int block_size = MB_SIZE >> s->block_max_depth;
+    int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst8= s->current_picture->data[plane_index];
+    int w= p->width;
+    int h= p->height;
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); // obmc params assume squares
+    if(s->keyframe || (s->avctx->debug&512)){
+        if(mb_y==mb_h)
+            return;
+
+        if(add){
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+                for(x=0; x<w; x++){
+                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    v >>= FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*ref_stride]= v;
+                }
+            }
+        }else{
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+                for(x=0; x<w; x++){
+                    buf[x + y*w]-= 128<<FRAC_BITS;
+                }
+            }
+        }
+
+        return;
+    }
+
+    for(mb_x=0; mb_x<=mb_w; mb_x++){
+        add_yblock(s, 0, NULL, buf, dst8, obmc,
+                   block_w*mb_x - block_w/2,
+                   block_h*mb_y - block_h/2,
+                   block_w, block_h,
+                   w, h,
+                   w, ref_stride, obmc_stride,
+                   mb_x - 1, mb_y - 1,
+                   add, 1, plane_index);
+    }
+}
+
+static av_always_inline void predict_slice_buffered(OBMCContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
+    PlaneObmc *p= &s->plane[plane_index];
+    const int mb_w= s->b_width  << s->block_max_depth;
+    const int mb_h= s->b_height << s->block_max_depth;
+    int x, y, mb_x;
+    int block_size = MB_SIZE >> s->block_max_depth;
+    int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst8= s->current_picture->data[plane_index];
+    int w= p->width;
+    int h= p->height;
+
+    if(s->keyframe || (s->avctx->debug&512)){
+        if(mb_y==mb_h)
+            return;
+
+        if(add){
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+                IDWTELEM * line = sb->line[y];
+                for(x=0; x<w; x++){
+                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    v >>= FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*ref_stride]= v;
+                }
+            }
+        }else{
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+                IDWTELEM * line = sb->line[y];
+                for(x=0; x<w; x++){
+                    line[x] -= 128 << FRAC_BITS;
+                }
+            }
+        }
+
+        return;
+    }
+
+    for(mb_x=0; mb_x<=mb_w; mb_x++){
+        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
+                   block_w*mb_x - block_w/2,
+                   block_h*mb_y - block_h/2,
+                   block_w, block_h,
+                   w, h,
+                   w, ref_stride, obmc_stride,
+                   mb_x - 1, mb_y - 1,
+                   add, 0, plane_index);
+    }
+
+    if(s->avmv && mb_y < mb_h && plane_index == 0)
+        for(mb_x=0; mb_x<mb_w; mb_x++){
+            AVMotionVector *avmv = s->avmv + s->avmv_index;
+            const int b_width = s->b_width  << s->block_max_depth;
+            const int b_stride= b_width;
+            BlockNode *bn= &s->block[mb_x + mb_y*b_stride];
+
+            if (bn->type)
+                continue;
+
+            s->avmv_index++;
+
+            avmv->w = block_w;
+            avmv->h = block_h;
+            avmv->dst_x = block_w*mb_x - block_w/2;
+            avmv->dst_y = block_h*mb_y - block_h/2;
+            avmv->motion_scale = 8;
+            avmv->motion_x = bn->mx * s->mv_scale;
+            avmv->motion_y = bn->my * s->mv_scale;
+            avmv->src_x = avmv->dst_x + avmv->motion_x / 8;
+            avmv->src_y = avmv->dst_y + avmv->motion_y / 8;
+            avmv->source= -1 - bn->ref;
+            avmv->flags = 0;
+        }
+}
+
+static av_always_inline void predict_plane(OBMCContext *s, IDWTELEM *buf, int plane_index, int add){
+    const int mb_h= s->b_height << s->block_max_depth;
+    int mb_y;
+    for(mb_y=0; mb_y<=mb_h; mb_y++)
+        predict_slice(s, buf, plane_index, add, mb_y);
+}
+
+static inline void set_blocks(OBMCContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type)
+{
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<rem_depth;
+    const int block_h= 1<<rem_depth; //FIXME "w!=h"
+    BlockNode block;
+    int i,j;
+
+    block.color[0]= l;
+    block.color[1]= cb;
+    block.color[2]= cr;
+    block.mx= mx;
+    block.my= my;
+    block.ref= ref;
+    block.type= type;
+    block.level= level;
+
+    for(j=0; j<block_h; j++){
+        for(i=0; i<block_w; i++){
+            s->block[index + i + j*w]= block;
+        }
+    }
+}
+
+static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
+    OBMCContext *s = c->avctx->priv_data;
+    const int offset[3]= {
+          y*c->  stride + x,
+        ((y*c->uvstride + x)>>s->chroma_h_shift),
+        ((y*c->uvstride + x)>>s->chroma_h_shift),
+    };
+    int i;
+    for(i=0; i<3; i++){
+        c->src[0][i]= src [i];
+        c->ref[0][i]= ref [i] + offset[i];
+    }
+    av_assert2(!ref_index);
+}
+
+#endif /* AVCODEC_OBMEMC_H */
diff --git a/libavcodec/obmemc_data.h b/libavcodec/obmemc_data.h
new file mode 100644
index 0000000..a32c27e
--- /dev/null
+++ b/libavcodec/obmemc_data.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OBMEMC_DATA_H
+#define AVCODEC_OBMEMC_DATA_H
+
+#include "obmemc.h"
+
+static const uint8_t obmc32[1024]={
+  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
+  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
+  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
+  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
+  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
+  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
+  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
+  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
+  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
+  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
+  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
+  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
+  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
+  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
+  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
+  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
+  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
+  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
+  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
+  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
+  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
+  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
+  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
+  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
+  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
+  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
+  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
+  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
+  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
+  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
+  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
+  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
+ //error:0.000020
+};
+static const uint8_t obmc16[256]={
+  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
+  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
+  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
+  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
+  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
+ 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
+ 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
+ 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
+ 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
+ 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
+ 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
+  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
+  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
+  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
+  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
+  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
+//error:0.000015
+};
+
+//linear *64
+static const uint8_t obmc8[64]={
+  4, 12, 20, 28, 28, 20, 12,  4,
+ 12, 36, 60, 84, 84, 60, 36, 12,
+ 20, 60,100,140,140,100, 60, 20,
+ 28, 84,140,196,196,140, 84, 28,
+ 28, 84,140,196,196,140, 84, 28,
+ 20, 60,100,140,140,100, 60, 20,
+ 12, 36, 60, 84, 84, 60, 36, 12,
+  4, 12, 20, 28, 28, 20, 12,  4,
+//error:0.000000
+};
+
+//linear *64
+static const uint8_t obmc4[16]={
+ 16, 48, 48, 16,
+ 48,144,144, 48,
+ 48,144,144, 48,
+ 16, 48, 48, 16,
+//error:0.000000
+};
+
+const int8_t ff_quant3bA[256]={
+ 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+};
+
+const uint8_t * const ff_obmc_tab[4]= {
+    obmc32, obmc16, obmc8, obmc4
+};
+
+/* runtime generated tables */
+uint8_t ff_qexp[QROOT];
+int ff_scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
+
+
+#endif /* AVCODEC_OBMEMC_DATA_H */
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index a3e6afc..7b210ea 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -26,7 +26,6 @@
 #include "snow_dwt.h"
 #include "internal.h"
 #include "snow.h"
-#include "snowdata.h"

 #include "rangecoder.h"
 #include "mathops.h"
@@ -66,40 +65,16 @@ void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_
     }
 }

-int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
-{
-    int ret, i;
-    int edges_needed = av_codec_is_encoder(s->avctx->codec);
-
-    frame->width  = s->avctx->width ;
-    frame->height = s->avctx->height;
-    if (edges_needed) {
-        frame->width  += 2 * EDGE_WIDTH;
-        frame->height += 2 * EDGE_WIDTH;
-    }
-    if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
-        return ret;
-    if (edges_needed) {
-        for (i = 0; frame->data[i]; i++) {
-            int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
-                            frame->linesize[i] +
-                            (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
-            frame->data[i] += offset;
-        }
-        frame->width  = s->avctx->width;
-        frame->height = s->avctx->height;
-    }
-
-    return 0;
-}
-
 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
     int plane_index, level, orientation;

     for(plane_index=0; plane_index<3; plane_index++){
         for(level=0; level<MAX_DECOMPOSITIONS; level++){
             for(orientation=level ? 1:0; orientation<4; orientation++){
-                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
+                memset(
+                    s->plane[plane_index].band[level][orientation].state,
+                    MID_STATE,
+                    sizeof(s->plane[plane_index].band[level][orientation].state));
             }
         }
     }
@@ -107,404 +82,25 @@ void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
     memset(s->block_state, MID_STATE, sizeof(s->block_state));
 }

-int ff_snow_alloc_blocks(SnowContext *s){
-    int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
-    int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
-
-    s->b_width = w;
-    s->b_height= h;
-
-    av_free(s->block);
-    s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
-    if (!s->block)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
-
-static av_cold void init_qexp(void){
-    int i;
-    double v=128;
-
-    for(i=0; i<QROOT; i++){
-        ff_qexp[i]= lrintf(v);
-        v *= pow(2, 1.0 / QROOT);
-    }
-}
-static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
-    static const uint8_t weight[64]={
-    8,7,6,5,4,3,2,1,
-    7,7,0,0,0,0,0,1,
-    6,0,6,0,0,0,2,0,
-    5,0,0,5,0,3,0,0,
-    4,0,0,0,4,0,0,0,
-    3,0,0,5,0,3,0,0,
-    2,0,6,0,0,0,2,0,
-    1,7,0,0,0,0,0,1,
-    };
-
-    static const uint8_t brane[256]={
-    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
-    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
-    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
-    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
-    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
-    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
-    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
-    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
-    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
-    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
-    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
-    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
-    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
-    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
-    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
-    };
-
-    static const uint8_t needs[16]={
-    0,1,0,0,
-    2,4,2,0,
-    0,1,0,0,
-    15
-    };
-
-    int x, y, b, r, l;
-    int16_t tmpIt   [64*(32+HTAPS_MAX)];
-    uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
-    int16_t *tmpI= tmpIt;
-    uint8_t *tmp2= tmp2t[0];
-    const uint8_t *hpel[11];
-    av_assert2(dx<16 && dy<16);
-    r= brane[dx + 16*dy]&15;
-    l= brane[dx + 16*dy]>>4;
-
-    b= needs[l] | needs[r];
-    if(p && !p->diag_mc)
-        b= 15;
-
-    if(b&5){
-        for(y=0; y < b_h+HTAPS_MAX-1; y++){
-            for(x=0; x < b_w; x++){
-                int a_1=src[x + HTAPS_MAX/2-4];
-                int a0= src[x + HTAPS_MAX/2-3];
-                int a1= src[x + HTAPS_MAX/2-2];
-                int a2= src[x + HTAPS_MAX/2-1];
-                int a3= src[x + HTAPS_MAX/2+0];
-                int a4= src[x + HTAPS_MAX/2+1];
-                int a5= src[x + HTAPS_MAX/2+2];
-                int a6= src[x + HTAPS_MAX/2+3];
-                int am=0;
-                if(!p || p->fast_mc){
-                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-                    tmpI[x]= am;
-                    am= (am+16)>>5;
-                }else{
-                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
-                    tmpI[x]= am;
-                    am= (am+32)>>6;
-                }
-
-                if(am&(~255)) am= ~(am>>31);
-                tmp2[x]= am;
-            }
-            tmpI+= 64;
-            tmp2+= 64;
-            src += stride;
-        }
-        src -= stride*y;
-    }
-    src += HTAPS_MAX/2 - 1;
-    tmp2= tmp2t[1];
-
-    if(b&2){
-        for(y=0; y < b_h; y++){
-            for(x=0; x < b_w+1; x++){
-                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
-                int a0= src[x + (HTAPS_MAX/2-3)*stride];
-                int a1= src[x + (HTAPS_MAX/2-2)*stride];
-                int a2= src[x + (HTAPS_MAX/2-1)*stride];
-                int a3= src[x + (HTAPS_MAX/2+0)*stride];
-                int a4= src[x + (HTAPS_MAX/2+1)*stride];
-                int a5= src[x + (HTAPS_MAX/2+2)*stride];
-                int a6= src[x + (HTAPS_MAX/2+3)*stride];
-                int am=0;
-                if(!p || p->fast_mc)
-                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
-                else
-                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
-
-                if(am&(~255)) am= ~(am>>31);
-                tmp2[x]= am;
-            }
-            src += stride;
-            tmp2+= 64;
-        }
-        src -= stride*y;
-    }
-    src += stride*(HTAPS_MAX/2 - 1);
-    tmp2= tmp2t[2];
-    tmpI= tmpIt;
-    if(b&4){
-        for(y=0; y < b_h; y++){
-            for(x=0; x < b_w; x++){
-                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
-                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
-                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
-                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
-                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
-                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
-                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
-                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
-                int am=0;
-                if(!p || p->fast_mc)
-                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
-                else
-                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
-                if(am&(~255)) am= ~(am>>31);
-                tmp2[x]= am;
-            }
-            tmpI+= 64;
-            tmp2+= 64;
-        }
-    }
-
-    hpel[ 0]= src;
-    hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
-    hpel[ 2]= src + 1;
-
-    hpel[ 4]= tmp2t[1];
-    hpel[ 5]= tmp2t[2];
-    hpel[ 6]= tmp2t[1] + 1;
-
-    hpel[ 8]= src + stride;
-    hpel[ 9]= hpel[1] + 64;
-    hpel[10]= hpel[8] + 1;
-
-#define MC_STRIDE(x) (needs[x] ? 64 : stride)
-
-    if(b==15){
-        int dxy = dx / 8 + dy / 8 * 4;
-        const uint8_t *src1 = hpel[dxy    ];
-        const uint8_t *src2 = hpel[dxy + 1];
-        const uint8_t *src3 = hpel[dxy + 4];
-        const uint8_t *src4 = hpel[dxy + 5];
-        int stride1 = MC_STRIDE(dxy);
-        int stride2 = MC_STRIDE(dxy + 1);
-        int stride3 = MC_STRIDE(dxy + 4);
-        int stride4 = MC_STRIDE(dxy + 5);
-        dx&=7;
-        dy&=7;
-        for(y=0; y < b_h; y++){
-            for(x=0; x < b_w; x++){
-                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
-                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
-            }
-            src1+=stride1;
-            src2+=stride2;
-            src3+=stride3;
-            src4+=stride4;
-            dst +=stride;
-        }
-    }else{
-        const uint8_t *src1= hpel[l];
-        const uint8_t *src2= hpel[r];
-        int stride1 = MC_STRIDE(l);
-        int stride2 = MC_STRIDE(r);
-        int a= weight[((dx&7) + (8*(dy&7)))];
-        int b= 8-a;
-        for(y=0; y < b_h; y++){
-            for(x=0; x < b_w; x++){
-                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
-            }
-            src1+=stride1;
-            src2+=stride2;
-            dst +=stride;
-        }
-    }
-}
-
-void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
-    if(block->type & BLOCK_INTRA){
-        int x, y;
-        const unsigned color  = block->color[plane_index];
-        const unsigned color4 = color*0x01010101;
-        if(b_w==32){
-            for(y=0; y < b_h; y++){
-                *(uint32_t*)&dst[0 + y*stride]= color4;
-                *(uint32_t*)&dst[4 + y*stride]= color4;
-                *(uint32_t*)&dst[8 + y*stride]= color4;
-                *(uint32_t*)&dst[12+ y*stride]= color4;
-                *(uint32_t*)&dst[16+ y*stride]= color4;
-                *(uint32_t*)&dst[20+ y*stride]= color4;
-                *(uint32_t*)&dst[24+ y*stride]= color4;
-                *(uint32_t*)&dst[28+ y*stride]= color4;
-            }
-        }else if(b_w==16){
-            for(y=0; y < b_h; y++){
-                *(uint32_t*)&dst[0 + y*stride]= color4;
-                *(uint32_t*)&dst[4 + y*stride]= color4;
-                *(uint32_t*)&dst[8 + y*stride]= color4;
-                *(uint32_t*)&dst[12+ y*stride]= color4;
-            }
-        }else if(b_w==8){
-            for(y=0; y < b_h; y++){
-                *(uint32_t*)&dst[0 + y*stride]= color4;
-                *(uint32_t*)&dst[4 + y*stride]= color4;
-            }
-        }else if(b_w==4){
-            for(y=0; y < b_h; y++){
-                *(uint32_t*)&dst[0 + y*stride]= color4;
-            }
-        }else{
-            for(y=0; y < b_h; y++){
-                for(x=0; x < b_w; x++){
-                    dst[x + y*stride]= color;
-                }
-            }
-        }
-    }else{
-        uint8_t *src= s->last_picture[block->ref]->data[plane_index];
-        const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
-        int mx= block->mx*scale;
-        int my= block->my*scale;
-        const int dx= mx&15;
-        const int dy= my&15;
-        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
-        sx += (mx>>4) - (HTAPS_MAX/2-1);
-        sy += (my>>4) - (HTAPS_MAX/2-1);
-        src += sx + sy*stride;
-        if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
-           || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
-            s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
-                                     stride, stride,
-                                     b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
-                                     sx, sy, w, h);
-            src= tmp + MB_SIZE;
-        }
-
-        av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
-
-        av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
-        if(    (dx&3) || (dy&3)
-            || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
-            || (b_w&(b_w-1))
-            || b_w == 1
-            || b_h == 1
-            || !s->plane[plane_index].fast_mc )
-            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
-        else if(b_w==32){
-            int y;
-            for(y=0; y<b_h; y+=16){
-                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
-                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
-            }
-        }else if(b_w==b_h)
-            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
-        else if(b_w==2*b_h){
-            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
-            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
-        }else{
-            av_assert2(2*b_w==b_h);
-            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
-            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
-        }
-    }
-}
-
-#define mca(dx,dy,b_w)\
-static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
-    av_assert2(h==b_w);\
-    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
-}
-
-mca( 0, 0,16)
-mca( 8, 0,16)
-mca( 0, 8,16)
-mca( 8, 8,16)
-mca( 0, 0,8)
-mca( 8, 0,8)
-mca( 0, 8,8)
-mca( 8, 8,8)
-
 av_cold int ff_snow_common_init(AVCodecContext *avctx){
     SnowContext *s = avctx->priv_data;
-    int width, height;
-    int i, j;
+    int width, height, ret;

     s->avctx= avctx;
-    s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
     s->spatial_decomposition_count = 1;

-    ff_me_cmp_init(&s->mecc, avctx);
-    ff_hpeldsp_init(&s->hdsp, avctx->flags);
-    ff_videodsp_init(&s->vdsp, 8);
-    ff_dwt_init(&s->dwt);
-    ff_h264qpel_init(&s->h264qpel, 8);
-
-#define mcf(dx,dy)\
-    s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
-    s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
-        s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
-    s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
-    s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
-        s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
-
-    mcf( 0, 0)
-    mcf( 4, 0)
-    mcf( 8, 0)
-    mcf(12, 0)
-    mcf( 0, 4)
-    mcf( 4, 4)
-    mcf( 8, 4)
-    mcf(12, 4)
-    mcf( 0, 8)
-    mcf( 4, 8)
-    mcf( 8, 8)
-    mcf(12, 8)
-    mcf( 0,12)
-    mcf( 4,12)
-    mcf( 8,12)
-    mcf(12,12)
-
-#define mcfh(dx,dy)\
-    s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
-    s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
-        mc_block_hpel ## dx ## dy ## 16;\
-    s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
-    s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
-        mc_block_hpel ## dx ## dy ## 8;
-
-    mcfh(0, 0)
-    mcfh(8, 0)
-    mcfh(0, 8)
-    mcfh(8, 8)
-
-    init_qexp();
-
 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);

     width= s->avctx->width;
     height= s->avctx->height;

-    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);

-    for(i=0; i<MAX_REF_FRAMES; i++) {
-        for(j=0; j<MAX_REF_FRAMES; j++)
-            ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
-        s->last_picture[i] = av_frame_alloc();
-        if (!s->last_picture[i])
-            goto fail;
-    }
-
-    s->mconly_picture = av_frame_alloc();
-    s->current_picture = av_frame_alloc();
-    if (!s->mconly_picture || !s->current_picture)
-        goto fail;
+    if ((ret = ff_obmc_common_init(&s->obmc, avctx)) < 0)
+        return ret;

     return 0;
 fail:
@@ -513,22 +109,10 @@ fail:

 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
     SnowContext *s = avctx->priv_data;
-    int plane_index, level, orientation;
-    int ret, emu_buf_size;
-
-    if(!s->scratchbuf) {
-        if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
-                                 AV_GET_BUFFER_FLAG_REF)) < 0)
-            return ret;
-        FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
-        emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
-        FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
-    }
+    int plane_index, level, orientation, ret;

-    if(s->mconly_picture->format != avctx->pix_fmt) {
-        av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
-        return AVERROR_INVALIDDATA;
-    }
+    if ((ret = ff_obmc_common_init_after_header(&s->obmc)) < 0)
+        return ret;

     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
         int w= s->avctx->width;
@@ -563,7 +147,7 @@ int ff_snow_common_init_after_header(AVCodecContext *avctx) {
                     b->buf += b->stride>>1;
                     b->buf_y_offset = b->stride_line >> 1;
                 }
-                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
+                b->ibuf= s->obmc.spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);

                 if(level)
                     b->parent= &s->plane[plane_index].band[level-1][orientation];
@@ -583,142 +167,15 @@ fail:
     return AVERROR(ENOMEM);
 }

-#define USE_HALFPEL_PLANE 0
-
-static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
-    int p,x,y;
-
-    for(p=0; p < s->nb_planes; p++){
-        int is_chroma= !!p;
-        int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
-        int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
-        int ls= frame->linesize[p];
-        uint8_t *src= frame->data[p];
-
-        halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
-        halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
-        halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
-        if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
-            av_freep(&halfpel[1][p]);
-            av_freep(&halfpel[2][p]);
-            av_freep(&halfpel[3][p]);
-            return AVERROR(ENOMEM);
-        }
-        halfpel[1][p] += EDGE_WIDTH * (1 + ls);
-        halfpel[2][p] += EDGE_WIDTH * (1 + ls);
-        halfpel[3][p] += EDGE_WIDTH * (1 + ls);
-
-        halfpel[0][p]= src;
-        for(y=0; y<h; y++){
-            for(x=0; x<w; x++){
-                int i= y*ls + x;
-
-                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
-            }
-        }
-        for(y=0; y<h; y++){
-            for(x=0; x<w; x++){
-                int i= y*ls + x;
-
-                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
-            }
-        }
-        src= halfpel[1][p];
-        for(y=0; y<h; y++){
-            for(x=0; x<w; x++){
-                int i= y*ls + x;
-
-                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
-            }
-        }
-
-//FIXME border!
-    }
-    return 0;
-}
-
-void ff_snow_release_buffer(AVCodecContext *avctx)
-{
-    SnowContext *s = avctx->priv_data;
-    int i;
-
-    if(s->last_picture[s->max_ref_frames-1]->data[0]){
-        av_frame_unref(s->last_picture[s->max_ref_frames-1]);
-        for(i=0; i<9; i++)
-            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
-                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
-                s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
-            }
-    }
-}
-
-int ff_snow_frame_start(SnowContext *s){
-   AVFrame *tmp;
-   int i, ret;
-
-    ff_snow_release_buffer(s->avctx);
-
-    tmp= s->last_picture[s->max_ref_frames-1];
-    for(i=s->max_ref_frames-1; i>0; i--)
-        s->last_picture[i] = s->last_picture[i-1];
-    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
-    if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
-        if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
-            return ret;
-    }
-    s->last_picture[0] = s->current_picture;
-    s->current_picture = tmp;
-
-    if(s->keyframe){
-        s->ref_frames= 0;
-    }else{
-        int i;
-        for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
-            if(i && s->last_picture[i-1]->key_frame)
-                break;
-        s->ref_frames= i;
-        if(s->ref_frames==0){
-            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
-            return AVERROR_INVALIDDATA;
-        }
-    }
-    if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
-        return ret;
-
-    s->current_picture->key_frame= s->keyframe;
-
-    return 0;
-}
-
 av_cold void ff_snow_common_end(SnowContext *s)
 {
-    int plane_index, level, orientation, i;
+    int plane_index, level, orientation;

     av_freep(&s->spatial_dwt_buffer);
     av_freep(&s->temp_dwt_buffer);
-    av_freep(&s->spatial_idwt_buffer);
     av_freep(&s->temp_idwt_buffer);
     av_freep(&s->run_buffer);

-    s->m.me.temp= NULL;
-    av_freep(&s->m.me.scratchpad);
-    av_freep(&s->m.me.map);
-    av_freep(&s->m.me.score_map);
-    av_freep(&s->m.sc.obmc_scratchpad);
-
-    av_freep(&s->block);
-    av_freep(&s->scratchbuf);
-    av_freep(&s->emu_edge_buffer);
-
-    for(i=0; i<MAX_REF_FRAMES; i++){
-        av_freep(&s->ref_mvs[i]);
-        av_freep(&s->ref_scores[i]);
-        if(s->last_picture[i] && s->last_picture[i]->data[0]) {
-            av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
-        }
-        av_frame_free(&s->last_picture[i]);
-    }
-
     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
@@ -728,6 +185,6 @@ av_cold void ff_snow_common_end(SnowContext *s)
             }
         }
     }
-    av_frame_free(&s->mconly_picture);
-    av_frame_free(&s->current_picture);
+
+    ff_obmc_close(&s->obmc);
 }
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 59c710b..712ed7d 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -32,48 +32,11 @@
 #include "rangecoder.h"
 #include "mathops.h"

-#define FF_MPV_OFFSET(x) (offsetof(MpegEncContext, x) + offsetof(SnowContext, m))
-#include "mpegvideo.h"
-#include "h264qpel.h"
+#define FF_MPV_OFFSET(x) (offsetof(MpegEncContext, x) + offsetof(SnowContext, obmc.m))
+#include "obmemc.h"

 #define MID_STATE 128

-#define MAX_PLANES 4
-#define QSHIFT 5
-#define QROOT (1<<QSHIFT)
-#define LOSSLESS_QLOG -128
-#define FRAC_BITS 4
-#define MAX_REF_FRAMES 8
-
-#define LOG2_OBMC_MAX 8
-#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
-typedef struct BlockNode{
-    int16_t mx;                 ///< Motion vector component X, see mv_scale
-    int16_t my;                 ///< Motion vector component Y, see mv_scale
-    uint8_t ref;                ///< Reference frame index
-    uint8_t color[3];           ///< Color for intra
-    uint8_t type;               ///< Bitfield of BLOCK_*
-//#define TYPE_SPLIT    1
-#define BLOCK_INTRA   1         ///< Intra block, inter otherwise
-#define BLOCK_OPT     2         ///< Block needs no checks in this round of iterative motion estiation
-//#define TYPE_NOCOLOR  4
-    uint8_t level; //FIXME merge into type?
-}BlockNode;
-
-static const BlockNode null_block= { //FIXME add border maybe
-    .color= {128,128,128},
-    .mx= 0,
-    .my= 0,
-    .ref= 0,
-    .type= 0,
-    .level= 0,
-};
-
-#define LOG2_MB_SIZE 4
-#define MB_SIZE (1<<LOG2_MB_SIZE)
-#define ENCODER_EXTRA_BITS 4
-#define HTAPS_MAX 8
-
 typedef struct x_and_coeff{
     int16_t x;
     uint16_t coeff;
@@ -99,33 +62,12 @@ typedef struct Plane{
     int width;
     int height;
     SubBand band[MAX_DECOMPOSITIONS][4];
-
-    int htaps;
-    int8_t hcoeff[HTAPS_MAX/2];
-    int diag_mc;
-    int fast_mc;
-
-    int last_htaps;
-    int8_t last_hcoeff[HTAPS_MAX/2];
-    int last_diag_mc;
-}Plane;
+} Plane;

 typedef struct SnowContext{
     AVClass *class;
     AVCodecContext *avctx;
     RangeCoder c;
-    MECmpContext mecc;
-    HpelDSPContext hdsp;
-    QpelDSPContext qdsp;
-    VideoDSPContext vdsp;
-    H264QpelContext h264qpel;
-    MpegvideoEncDSPContext mpvencdsp;
-    SnowDWTContext dwt;
-    AVFrame *input_picture;              ///< new_picture with the internal linesizes
-    AVFrame *current_picture;
-    AVFrame *last_picture[MAX_REF_FRAMES];
-    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
-    AVFrame *mconly_picture;
 //     uint8_t q_context[16];
     uint8_t header_state[32];
     uint8_t block_state[128 + 32*128];
@@ -138,13 +80,8 @@ typedef struct SnowContext{
     int spatial_decomposition_count;
     int last_spatial_decomposition_count;
     int temporal_decomposition_count;
-    int max_ref_frames;
-    int ref_frames;
-    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
-    uint32_t *ref_scores[MAX_REF_FRAMES];
     DWTELEM *spatial_dwt_buffer;
     DWTELEM *temp_dwt_buffer;
-    IDWTELEM *spatial_idwt_buffer;
     IDWTELEM *temp_idwt_buffer;
     int *run_buffer;
     int colorspace_type;
@@ -153,49 +90,24 @@ typedef struct SnowContext{
     int spatial_scalability;
     int qlog;
     int last_qlog;
-    int lambda;
-    int lambda2;
     int pass1_rc;
-    int mv_scale;
-    int last_mv_scale;
     int qbias;
     int last_qbias;
 #define QBIAS_SHIFT 3
-    int b_width;
-    int b_height;
-    int block_max_depth;
-    int last_block_max_depth;
     int nb_planes;
     Plane plane[MAX_PLANES];
-    BlockNode *block;
-#define ME_CACHE_SIZE 1024
-    unsigned me_cache[ME_CACHE_SIZE];
-    unsigned me_cache_generation;
     slice_buffer sb;
     int memc_only;
     int no_bitstream;
-    int intra_penalty;
-    int motion_est;
-    int iterative_dia_size;
     int scenechange_threshold;

-    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
-
-    uint8_t *scratchbuf;
-    uint8_t *emu_edge_buffer;
-
-    AVMotionVector *avmv;
-    int avmv_index;
     uint64_t encoding_error[AV_NUM_DATA_POINTERS];

+    OBMCContext obmc;
+
     int pred;
 }SnowContext;

-/* Tables */
-extern const uint8_t * const ff_obmc_tab[4];
-extern uint8_t ff_qexp[QROOT];
-extern int ff_scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
-
 /* C bits used by mmx/sse2/altivec */

 static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
@@ -239,265 +151,7 @@ static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, IDWTE
 int ff_snow_common_init(AVCodecContext *avctx);
 int ff_snow_common_init_after_header(AVCodecContext *avctx);
 void ff_snow_common_end(SnowContext *s);
-void ff_snow_release_buffer(AVCodecContext *avctx);
 void ff_snow_reset_contexts(SnowContext *s);
-int ff_snow_alloc_blocks(SnowContext *s);
-int ff_snow_frame_start(SnowContext *s);
-void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride,
-                     int sx, int sy, int b_w, int b_h, const BlockNode *block,
-                     int plane_index, int w, int h);
-int ff_snow_get_buffer(SnowContext *s, AVFrame *frame);
-/* common inline functions */
-//XXX doublecheck all of them should stay inlined
-
-static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
-                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
-    if(s->ref_frames == 1){
-        *mx = mid_pred(left->mx, top->mx, tr->mx);
-        *my = mid_pred(left->my, top->my, tr->my);
-    }else{
-        const int *scale = ff_scale_mv_ref[ref];
-        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
-                       (top ->mx * scale[top ->ref] + 128) >>8,
-                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
-        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
-                       (top ->my * scale[top ->ref] + 128) >>8,
-                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
-    }
-}
-
-static av_always_inline int same_block(BlockNode *a, BlockNode *b){
-    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
-        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
-    }else{
-        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
-    }
-}
-
-//FIXME name cleanup (b_w, block_w, b_width stuff)
-//XXX should we really inline it?
-static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
-    const int b_width = s->b_width  << s->block_max_depth;
-    const int b_height= s->b_height << s->block_max_depth;
-    const int b_stride= b_width;
-    BlockNode *lt= &s->block[b_x + b_y*b_stride];
-    BlockNode *rt= lt+1;
-    BlockNode *lb= lt+b_stride;
-    BlockNode *rb= lb+1;
-    uint8_t *block[4];
-    // When src_stride is large enough, it is possible to interleave the blocks.
-    // Otherwise the blocks are written sequentially in the tmp buffer.
-    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
-    uint8_t *tmp = s->scratchbuf;
-    uint8_t *ptmp;
-    int x,y;
-
-    if(b_x<0){
-        lt= rt;
-        lb= rb;
-    }else if(b_x + 1 >= b_width){
-        rt= lt;
-        rb= lb;
-    }
-    if(b_y<0){
-        lt= lb;
-        rt= rb;
-    }else if(b_y + 1 >= b_height){
-        lb= lt;
-        rb= rt;
-    }
-
-    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
-        obmc -= src_x;
-        b_w += src_x;
-        if(!sliced && !offset_dst)
-            dst -= src_x;
-        src_x=0;
-    }
-    if(src_x + b_w > w){
-        b_w = w - src_x;
-    }
-    if(src_y<0){
-        obmc -= src_y*obmc_stride;
-        b_h += src_y;
-        if(!sliced && !offset_dst)
-            dst -= src_y*dst_stride;
-        src_y=0;
-    }
-    if(src_y + b_h> h){
-        b_h = h - src_y;
-    }
-
-    if(b_w<=0 || b_h<=0) return;
-
-    if(!sliced && offset_dst)
-        dst += src_x + src_y*dst_stride;
-    dst8+= src_x + src_y*src_stride;
-//    src += src_x + src_y*src_stride;
-
-    ptmp= tmp + 3*tmp_step;
-    block[0]= ptmp;
-    ptmp+=tmp_step;
-    ff_snow_pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
-
-    if(same_block(lt, rt)){
-        block[1]= block[0];
-    }else{
-        block[1]= ptmp;
-        ptmp+=tmp_step;
-        ff_snow_pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
-    }
-
-    if(same_block(lt, lb)){
-        block[2]= block[0];
-    }else if(same_block(rt, lb)){
-        block[2]= block[1];
-    }else{
-        block[2]= ptmp;
-        ptmp+=tmp_step;
-        ff_snow_pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
-    }
-
-    if(same_block(lt, rb) ){
-        block[3]= block[0];
-    }else if(same_block(rt, rb)){
-        block[3]= block[1];
-    }else if(same_block(lb, rb)){
-        block[3]= block[2];
-    }else{
-        block[3]= ptmp;
-        ff_snow_pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
-    }
-    if(sliced){
-        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
-    }else{
-        for(y=0; y<b_h; y++){
-            //FIXME ugly misuse of obmc_stride
-            const uint8_t *obmc1= obmc + y*obmc_stride;
-            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
-            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
-            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
-            for(x=0; x<b_w; x++){
-                int v=   obmc1[x] * block[3][x + y*src_stride]
-                        +obmc2[x] * block[2][x + y*src_stride]
-                        +obmc3[x] * block[1][x + y*src_stride]
-                        +obmc4[x] * block[0][x + y*src_stride];
-
-                v <<= 8 - LOG2_OBMC_MAX;
-                if(FRAC_BITS != 8){
-                    v >>= 8 - FRAC_BITS;
-                }
-                if(add){
-                    v += dst[x + y*dst_stride];
-                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
-                    if(v&(~255)) v= ~(v>>31);
-                    dst8[x + y*src_stride] = v;
-                }else{
-                    dst[x + y*dst_stride] -= v;
-                }
-            }
-        }
-    }
-}
-
-static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
-    Plane *p= &s->plane[plane_index];
-    const int mb_w= s->b_width  << s->block_max_depth;
-    const int mb_h= s->b_height << s->block_max_depth;
-    int x, y, mb_x;
-    int block_size = MB_SIZE >> s->block_max_depth;
-    int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
-    int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
-    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
-    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
-    int ref_stride= s->current_picture->linesize[plane_index];
-    uint8_t *dst8= s->current_picture->data[plane_index];
-    int w= p->width;
-    int h= p->height;
-    av_assert2(s->chroma_h_shift == s->chroma_v_shift); // obmc params assume squares
-    if(s->keyframe || (s->avctx->debug&512)){
-        if(mb_y==mb_h)
-            return;
-
-        if(add){
-            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
-                for(x=0; x<w; x++){
-                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
-                    v >>= FRAC_BITS;
-                    if(v&(~255)) v= ~(v>>31);
-                    dst8[x + y*ref_stride]= v;
-                }
-            }
-        }else{
-            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
-                for(x=0; x<w; x++){
-                    buf[x + y*w]-= 128<<FRAC_BITS;
-                }
-            }
-        }
-
-        return;
-    }
-
-    for(mb_x=0; mb_x<=mb_w; mb_x++){
-        add_yblock(s, 0, NULL, buf, dst8, obmc,
-                   block_w*mb_x - block_w/2,
-                   block_h*mb_y - block_h/2,
-                   block_w, block_h,
-                   w, h,
-                   w, ref_stride, obmc_stride,
-                   mb_x - 1, mb_y - 1,
-                   add, 1, plane_index);
-    }
-}
-
-static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
-    const int mb_h= s->b_height << s->block_max_depth;
-    int mb_y;
-    for(mb_y=0; mb_y<=mb_h; mb_y++)
-        predict_slice(s, buf, plane_index, add, mb_y);
-}
-
-static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
-    const int w= s->b_width << s->block_max_depth;
-    const int rem_depth= s->block_max_depth - level;
-    const int index= (x + y*w) << rem_depth;
-    const int block_w= 1<<rem_depth;
-    const int block_h= 1<<rem_depth; //FIXME "w!=h"
-    BlockNode block;
-    int i,j;
-
-    block.color[0]= l;
-    block.color[1]= cb;
-    block.color[2]= cr;
-    block.mx= mx;
-    block.my= my;
-    block.ref= ref;
-    block.type= type;
-    block.level= level;
-
-    for(j=0; j<block_h; j++){
-        for(i=0; i<block_w; i++){
-            s->block[index + i + j*w]= block;
-        }
-    }
-}
-
-static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
-    SnowContext *s = c->avctx->priv_data;
-    const int offset[3]= {
-          y*c->  stride + x,
-        ((y*c->uvstride + x)>>s->chroma_h_shift),
-        ((y*c->uvstride + x)>>s->chroma_h_shift),
-    };
-    int i;
-    for(i=0; i<3; i++){
-        c->src[0][i]= src [i];
-        c->ref[0][i]= ref [i] + offset[i];
-    }
-    av_assert2(!ref_index);
-}
-

 /* bitstream functions */

diff --git a/libavcodec/snowdata.h b/libavcodec/snowdata.h
deleted file mode 100644
index 490fdf8..0000000
--- a/libavcodec/snowdata.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
- * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_SNOWDATA_H
-#define AVCODEC_SNOWDATA_H
-
-#include "snow.h"
-
-static const uint8_t obmc32[1024]={
-  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
-  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
-  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
-  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
-  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
-  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
-  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
-  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
-  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
-  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
-  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
-  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
-  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
-  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
-  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
-  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
-  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
-  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
-  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
-  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
-  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
-  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
-  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
-  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
-  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
-  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
-  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
-  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
-  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
-  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
-  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
-  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
- //error:0.000020
-};
-static const uint8_t obmc16[256]={
-  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
-  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
-  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
-  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
-  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
- 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
- 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
- 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
- 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
- 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
- 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
-  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
-  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
-  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
-  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
-  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
-//error:0.000015
-};
-
-//linear *64
-static const uint8_t obmc8[64]={
-  4, 12, 20, 28, 28, 20, 12,  4,
- 12, 36, 60, 84, 84, 60, 36, 12,
- 20, 60,100,140,140,100, 60, 20,
- 28, 84,140,196,196,140, 84, 28,
- 28, 84,140,196,196,140, 84, 28,
- 20, 60,100,140,140,100, 60, 20,
- 12, 36, 60, 84, 84, 60, 36, 12,
-  4, 12, 20, 28, 28, 20, 12,  4,
-//error:0.000000
-};
-
-//linear *64
-static const uint8_t obmc4[16]={
- 16, 48, 48, 16,
- 48,144,144, 48,
- 48,144,144, 48,
- 16, 48, 48, 16,
-//error:0.000000
-};
-
-const int8_t ff_quant3bA[256]={
- 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
- 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
-};
-
-const uint8_t * const ff_obmc_tab[4]= {
-    obmc32, obmc16, obmc8, obmc4
-};
-
-/* runtime generated tables */
-uint8_t ff_qexp[QROOT];
-int ff_scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
-
-
-#endif /* AVCODEC_SNOW_H */
diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c
index 042aecb..b224470 100644
--- a/libavcodec/snowdec.c
+++ b/libavcodec/snowdec.c
@@ -29,90 +29,7 @@
 #include "rangecoder.h"
 #include "mathops.h"

-#include "mpegvideo.h"
-#include "h263.h"
-
-static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
-    Plane *p= &s->plane[plane_index];
-    const int mb_w= s->b_width  << s->block_max_depth;
-    const int mb_h= s->b_height << s->block_max_depth;
-    int x, y, mb_x;
-    int block_size = MB_SIZE >> s->block_max_depth;
-    int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
-    int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
-    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
-    int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
-    int ref_stride= s->current_picture->linesize[plane_index];
-    uint8_t *dst8= s->current_picture->data[plane_index];
-    int w= p->width;
-    int h= p->height;
-
-    if(s->keyframe || (s->avctx->debug&512)){
-        if(mb_y==mb_h)
-            return;
-
-        if(add){
-            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
-//                DWTELEM * line = slice_buffer_get_line(sb, y);
-                IDWTELEM * line = sb->line[y];
-                for(x=0; x<w; x++){
-//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
-                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
-                    v >>= FRAC_BITS;
-                    if(v&(~255)) v= ~(v>>31);
-                    dst8[x + y*ref_stride]= v;
-                }
-            }
-        }else{
-            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
-//                DWTELEM * line = slice_buffer_get_line(sb, y);
-                IDWTELEM * line = sb->line[y];
-                for(x=0; x<w; x++){
-                    line[x] -= 128 << FRAC_BITS;
-//                    buf[x + y*w]-= 128<<FRAC_BITS;
-                }
-            }
-        }
-
-        return;
-    }
-
-    for(mb_x=0; mb_x<=mb_w; mb_x++){
-        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
-                   block_w*mb_x - block_w/2,
-                   block_h*mb_y - block_h/2,
-                   block_w, block_h,
-                   w, h,
-                   w, ref_stride, obmc_stride,
-                   mb_x - 1, mb_y - 1,
-                   add, 0, plane_index);
-    }
-
-    if(s->avmv && mb_y < mb_h && plane_index == 0)
-        for(mb_x=0; mb_x<mb_w; mb_x++){
-            AVMotionVector *avmv = s->avmv + s->avmv_index;
-            const int b_width = s->b_width  << s->block_max_depth;
-            const int b_stride= b_width;
-            BlockNode *bn= &s->block[mb_x + mb_y*b_stride];
-
-            if (bn->type)
-                continue;
-
-            s->avmv_index++;
-
-            avmv->w = block_w;
-            avmv->h = block_h;
-            avmv->dst_x = block_w*mb_x - block_w/2;
-            avmv->dst_y = block_h*mb_y - block_h/2;
-            avmv->motion_scale = 8;
-            avmv->motion_x = bn->mx * s->mv_scale;
-            avmv->motion_y = bn->my * s->mv_scale;
-            avmv->src_x = avmv->dst_x + avmv->motion_x / 8;
-            avmv->src_y = avmv->dst_y + avmv->motion_y / 8;
-            avmv->source= -1 - bn->ref;
-            avmv->flags = 0;
-        }
-}
+#include "obmc.h"

 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
     const int w= b->width;
@@ -122,7 +39,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
     int new_index = 0;

-    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
+    if(b->ibuf == s->obmc.spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
         qadd= 0;
         qmul= 1<<QEXPSHIFT;
     }
@@ -156,23 +73,25 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
 }

 static int decode_q_branch(SnowContext *s, int level, int x, int y){
-    const int w= s->b_width << s->block_max_depth;
-    const int rem_depth= s->block_max_depth - level;
+    RangeCoder *const c = &s->c;
+    OBMCContext *oc = &s->obmc;
+    const int w= oc->b_width << oc->block_max_depth;
+    const int rem_depth= oc->block_max_depth - level;
     const int index= (x + y*w) << rem_depth;
     int trx= (x+1)<<rem_depth;
-    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
-    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    const BlockNode *left  = x ? &oc->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &oc->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &oc->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &oc->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
     int res;

-    if(s->keyframe){
-        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
+    if(oc->keyframe){
+        set_blocks(oc, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
         return 0;
     }

-    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
+    if(level==oc->block_max_depth || get_rac(c, &s->block_state[4 + s_context])){
         int type, mx, my;
         int l = left->color[0];
         int cb= left->color[1];
@@ -182,27 +101,27 @@ static int decode_q_branch(SnowContext *s, int level, int x, int y){
         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));

-        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
+        type= get_rac(c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;

         if(type){
-            pred_mv(s, &mx, &my, 0, left, top, tr);
-            l += get_symbol(&s->c, &s->block_state[32], 1);
-            if (s->nb_planes > 2) {
-                cb+= get_symbol(&s->c, &s->block_state[64], 1);
-                cr+= get_symbol(&s->c, &s->block_state[96], 1);
+            pred_mv(oc, &mx, &my, 0, left, top, tr);
+            l += get_symbol(c, &s->block_state[32], 1);
+            if (s->obmc.nb_planes > 2) {
+                cb += get_symbol(c, &s->block_state[64], 1);
+                cr += get_symbol(c, &s->block_state[96], 1);
             }
         }else{
-            if(s->ref_frames > 1)
-                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
-            if (ref >= s->ref_frames) {
+            if(oc->ref_frames > 1)
+                ref = get_symbol(c, &s->block_state[128 + 1024 + 32*ref_context], 0);
+            if (ref >= oc->ref_frames) {
                 av_log(s->avctx, AV_LOG_ERROR, "Invalid ref\n");
                 return AVERROR_INVALIDDATA;
             }
-            pred_mv(s, &mx, &my, ref, left, top, tr);
-            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
-            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+            pred_mv(oc, &mx, &my, ref, left, top, tr);
+            mx += get_symbol(c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+            my += get_symbol(c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
         }
-        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
+        set_blocks(oc, level, x, y, l, cb, cr, mx, my, ref, type);
     }else{
         if ((res = decode_q_branch(s, level+1, 2*x+0, 2*y+0)) < 0 ||
             (res = decode_q_branch(s, level+1, 2*x+1, 2*y+0)) < 0 ||
@@ -291,19 +210,20 @@ static void decode_qlogs(SnowContext *s){
     dst= tmp;

 static int decode_header(SnowContext *s){
-    int plane_index, tmp;
+    int plane_index, tmp, ret;
     uint8_t kstate[32];

     memset(kstate, MID_STATE, sizeof(kstate));

     s->keyframe= get_rac(&s->c, kstate);
+    s->obmc.keyframe = s->keyframe;
     if(s->keyframe || s->always_reset){
         ff_snow_reset_contexts(s);
         s->spatial_decomposition_type=
         s->qlog=
         s->qbias=
-        s->mv_scale=
-        s->block_max_depth= 0;
+        s->obmc.mv_scale=
+        s->obmc.block_max_depth= 0;
     }
     if(s->keyframe){
         GET_S(s->version, tmp <= 0U)
@@ -342,8 +262,8 @@ static int decode_header(SnowContext *s){

         s->spatial_scalability= get_rac(&s->c, s->header_state);
 //        s->rate_scalability= get_rac(&s->c, s->header_state);
-        GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
-        s->max_ref_frames++;
+        GET_S(s->obmc.max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
+        s->obmc.max_ref_frames++;

         decode_qlogs(s);
     }
@@ -352,7 +272,7 @@ static int decode_header(SnowContext *s){
         if(get_rac(&s->c, s->header_state)){
             for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
                 int htaps, i, sum=0;
-                Plane *p= &s->plane[plane_index];
+                PlaneObmc *p= &s->obmc.plane[plane_index];
                 p->diag_mc= get_rac(&s->c, s->header_state);
                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
@@ -364,9 +284,9 @@ static int decode_header(SnowContext *s){
                 }
                 p->hcoeff[0]= 32-sum;
             }
-            s->plane[2].diag_mc= s->plane[1].diag_mc;
-            s->plane[2].htaps  = s->plane[1].htaps;
-            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
+            s->obmc.plane[2].diag_mc= s->obmc.plane[1].diag_mc;
+            s->obmc.plane[2].htaps  = s->obmc.plane[1].htaps;
+            memcpy(s->obmc.plane[2].hcoeff, s->obmc.plane[1].hcoeff, sizeof(s->obmc.plane[1].hcoeff));
         }
         if(get_rac(&s->c, s->header_state)){
             GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
@@ -386,16 +306,19 @@ static int decode_header(SnowContext *s){
     }


-    s->qlog           += get_symbol(&s->c, s->header_state, 1);
-    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
-    s->qbias          += get_symbol(&s->c, s->header_state, 1);
-    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
-    if(s->block_max_depth > 1 || s->block_max_depth < 0){
-        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large\n", s->block_max_depth);
-        s->block_max_depth= 0;
+    s->qlog                 += get_symbol(&s->c, s->header_state, 1);
+    s->obmc.mv_scale        += get_symbol(&s->c, s->header_state, 1);
+    s->qbias                += get_symbol(&s->c, s->header_state, 1);
+    s->obmc.block_max_depth += get_symbol(&s->c, s->header_state, 1);
+    if(s->obmc.block_max_depth > 1 || s->obmc.block_max_depth < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large\n", s->obmc.block_max_depth);
+        s->obmc.block_max_depth= 0;
         return AVERROR_INVALIDDATA;
     }

+    if ((ret = ff_obmc_decode_init(&s->obmc)) < 0)
+        return ret;
+
     return 0;
 }

@@ -412,8 +335,8 @@ static av_cold int decode_init(AVCodecContext *avctx)

 static int decode_blocks(SnowContext *s){
     int x, y;
-    int w= s->b_width;
-    int h= s->b_height;
+    int w= s->obmc.b_width;
+    int h= s->obmc.b_height;
     int res;

     for(y=0; y<h; y++){
@@ -440,7 +363,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     ff_init_range_decoder(c, buf, buf_size);
     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);

-    s->current_picture->pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
+    s->obmc.current_picture->pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
     if ((res = decode_header(s)) < 0)
         return res;
     if ((res=ff_snow_common_init_after_header(avctx)) < 0)
@@ -448,42 +371,25 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,

     // realloc slice buffer for the case that spatial_decomposition_count changed
     ff_slice_buffer_destroy(&s->sb);
-    if ((res = ff_slice_buffer_init(&s->sb, s->plane[0].height,
-                                    (MB_SIZE >> s->block_max_depth) +
+    if ((res = ff_slice_buffer_init(&s->sb, s->obmc.plane[0].height,
+                                    (MB_SIZE >> s->obmc.block_max_depth) +
                                     s->spatial_decomposition_count * 11 + 1,
-                                    s->plane[0].width,
-                                    s->spatial_idwt_buffer)) < 0)
-        return res;
-
-    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
-        Plane *p= &s->plane[plane_index];
-        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
-                                              && p->hcoeff[1]==-10
-                                              && p->hcoeff[2]==2;
-    }
-
-    ff_snow_alloc_blocks(s);
-
-    if((res = ff_snow_frame_start(s)) < 0)
+                                    s->obmc.plane[0].width,
+                                    s->obmc.spatial_idwt_buffer)) < 0)
         return res;

-    s->current_picture->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
-
     //keyframe flag duplication mess FIXME
     if(avctx->debug&FF_DEBUG_PICT_INFO)
         av_log(avctx, AV_LOG_ERROR,
                "keyframe:%d qlog:%d qbias: %d mvscale: %d "
                "decomposition_type:%d decomposition_count:%d\n",
-               s->keyframe, s->qlog, s->qbias, s->mv_scale,
+               s->keyframe, s->qlog, s->qbias, s->obmc.mv_scale,
                s->spatial_decomposition_type,
                s->spatial_decomposition_count
               );

-    av_assert0(!s->avmv);
-    if (s->avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) {
-        s->avmv = av_malloc_array(s->b_width * s->b_height, sizeof(AVMotionVector) << (s->block_max_depth*2));
-    }
-    s->avmv_index = 0;
+    if ((res = ff_obmc_predecode_frame(&s->obmc)) < 0)
+        return res;

     if ((res = decode_blocks(s)) < 0)
         return res;
@@ -497,12 +403,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,

         if(s->avctx->debug&2048){
             memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
-            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+            predict_plane(&s->obmc, s->obmc.spatial_idwt_buffer, plane_index, 1);

             for(y=0; y<h; y++){
                 for(x=0; x<w; x++){
-                    int v= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x];
-                    s->mconly_picture->data[plane_index][y*s->mconly_picture->linesize[plane_index] + x]= v;
+                    int v= s->obmc.current_picture->data[plane_index][y*s->obmc.current_picture->linesize[plane_index] + x];
+                    s->obmc.mconly_picture->data[plane_index][y*s->obmc.mconly_picture->linesize[plane_index] + x]= v;
                 }
             }
         }
@@ -517,8 +423,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }

         {
-        const int mb_h= s->b_height << s->block_max_depth;
-        const int block_size = MB_SIZE >> s->block_max_depth;
+        const int mb_h= s->obmc.b_height << s->obmc.block_max_depth;
+        const int block_size = MB_SIZE >> s->obmc.block_max_depth;
         const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
         int mb_y;
         DWTCompose cs[MAX_DECOMPOSITIONS];
@@ -570,7 +476,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             }

             for(; yd<slice_h; yd+=4){
-                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, s->temp_idwt_buffer, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
+                ff_spatial_idwt_buffered_slice(&s->obmc.dwt, cs, &s->sb, s->temp_idwt_buffer, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
             }

             if(s->qlog == LOSSLESS_QLOG){
@@ -582,7 +488,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 }
             }

-            predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
+            predict_slice_buffered(&s->obmc, &s->sb, s->obmc.spatial_idwt_buffer, plane_index, 1, mb_y);

             y = FFMIN(p->height, slice_starty);
             end_y = FFMIN(p->height, slice_h);
@@ -597,22 +503,22 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,

     emms_c();

-    ff_snow_release_buffer(avctx);
+    ff_obmc_release_buffer(&s->obmc);

     if(!(s->avctx->debug&2048))
-        res = av_frame_ref(picture, s->current_picture);
+        res = av_frame_ref(picture, s->obmc.current_picture);
     else
-        res = av_frame_ref(picture, s->mconly_picture);
-    if (res >= 0 && s->avmv_index) {
+        res = av_frame_ref(picture, s->obmc.mconly_picture);
+    if (res >= 0 && s->obmc.avmv_index) {
         AVFrameSideData *sd;

-        sd = av_frame_new_side_data(picture, AV_FRAME_DATA_MOTION_VECTORS, s->avmv_index * sizeof(AVMotionVector));
+        sd = av_frame_new_side_data(picture, AV_FRAME_DATA_MOTION_VECTORS, s->obmc.avmv_index * sizeof(AVMotionVector));
         if (!sd)
             return AVERROR(ENOMEM);
-        memcpy(sd->data, s->avmv, s->avmv_index * sizeof(AVMotionVector));
+        memcpy(sd->data, s->obmc.avmv, s->obmc.avmv_index * sizeof(AVMotionVector));
     }

-    av_freep(&s->avmv);
+    av_freep(&s->obmc.avmv);

     if (res < 0)
         return res;
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 00aef57..40bc84b 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -30,16 +30,164 @@
 #include "rangecoder.h"
 #include "mathops.h"

-#include "mpegvideo.h"
-#include "h263.h"
+#include "obme.h"

 #define FF_ME_ITER 50

+typedef struct RangeEncoderContext {
+    RangeCoder c;
+    uint8_t buffer[1024];
+    uint8_t state[128 + 32*128];
+    uint8_t *pbbak;
+    uint8_t *pbbak_start;
+    int base_bits;
+} RangeEncoderContext;
+
+static void put_encoder_rac(ObmcCoderContext *c, int ctx, int v)
+{
+    SnowContext *s = (SnowContext *)c->avctx->priv_data;
+    RangeCoder *rc = &s->c;
+    uint8_t *state = s->block_state;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c; state = coder->state;
+    }
+    put_rac(rc, &state[ctx], v);
+}
+
+static void put_encoder_symbol(ObmcCoderContext *c, int ctx, int v, int sign)
+{
+    SnowContext *s = (SnowContext *)c->avctx->priv_data;
+    RangeCoder *rc = &s->c;
+    uint8_t *state = s->block_state;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c; state = coder->state;
+    }
+    put_symbol(rc, &state[ctx], v, sign);
+}
+
+static void ff_snow_init_encode_callbacks(ObmcCoderContext *, AVCodecContext *);
+
+static void init_frame_encoder(AVCodecContext *avctx, ObmcCoderContext *c)
+{
+    SnowContext *f = (SnowContext *)avctx->priv_data;
+    RangeEncoderContext *coder = av_mallocz(sizeof(RangeEncoderContext));
+    c->priv_data = coder;
+
+    coder->pbbak = f->c.bytestream;
+    coder->pbbak_start = f->c.bytestream_start;
+    coder->base_bits = get_rac_count(&f->c) - 8*(f->c.bytestream - f->c.bytestream_start);
+    coder->c = f->c;
+    coder->c.bytestream_start = coder->c.bytestream= coder->buffer; //FIXME end/start? and at the other stoo
+    memcpy(coder->state, f->block_state, sizeof(f->block_state));
+
+    ff_snow_init_encode_callbacks(c, avctx);
+}
+
+static void free_coder (ObmcCoderContext *c)
+{
+    av_freep(&c->priv_data);
+}
+
+static void copy_coder        (struct ObmcCoderContext *c)
+{
+    SnowContext *f = (SnowContext *)c->avctx->priv_data;
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+
+    int len = coder->c.bytestream - coder->c.bytestream_start;
+
+    memcpy(coder->pbbak, coder->buffer, len);
+    f->c = coder->c;
+    f->c.bytestream_start= coder->pbbak_start;
+    f->c.bytestream= coder->pbbak + len;
+    memcpy(f->block_state, coder->state, sizeof(f->block_state));
+}
+
+static void reset_coder      (struct ObmcCoderContext *c)
+{
+    SnowContext *f = (SnowContext *)c->avctx->priv_data;
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+
+    f->c = coder->c;
+    f->c.bytestream_start= coder->pbbak_start;
+    f->c.bytestream= coder->pbbak;
+    memcpy(f->block_state, coder->state, sizeof(f->block_state));
+}
+
+static void put_level_break(ObmcCoderContext *c, int ctx, int v)
+{
+    put_encoder_rac(c, ctx, v);
+}
+
+static void put_block_type  (struct ObmcCoderContext *c, int ctx, int type)
+{
+    put_encoder_rac(c, ctx, type);
+}
+
+static void put_best_ref    (struct ObmcCoderContext *c, int ctx, int best_ref)
+{
+    put_encoder_symbol(c, ctx, best_ref, 0);
+}
+
+static void put_block_mv    (struct ObmcCoderContext *c, int ctx_mx, int ctx_my, int mx, int my)
+{
+    put_encoder_symbol(c, ctx_mx, mx, 1);
+    put_encoder_symbol(c, ctx_my, my, 1);
+}
+
+static void put_block_color (struct ObmcCoderContext *c, int ctx_l, int ctx_cb, int ctx_cr, int l, int cb, int cr)
+{
+    SnowContext *s = (SnowContext *)c->avctx->priv_data;
+    put_encoder_symbol(c, ctx_l, l, 1);
+    if (s->obmc.nb_planes > 2) {
+        put_encoder_symbol(c, ctx_cb, cb, 1);
+        put_encoder_symbol(c, ctx_cr, cr, 1);
+    }
+}
+
+static int get_coder_bits(ObmcCoderContext *c)
+{
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+    return get_rac_count(&coder->c) - coder->base_bits;
+}
+
+static int get_coder_available_bytes(ObmcCoderContext *c)
+{
+    SnowContext *s = (SnowContext *)c->avctx->priv_data;
+    RangeCoder *rc = &s->c;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+    }
+    return rc->bytestream_end - rc->bytestream;
+}
+
+static void ff_snow_init_encode_callbacks(ObmcCoderContext *c, AVCodecContext *avctx)
+{
+    SnowContext *s = (SnowContext *)c->avctx->priv_data;
+    av_assert0(sizeof(s->block_state) >= 256);
+
+    c->avctx = avctx;
+    c->put_level_break = put_level_break;
+    c->put_block_type  = put_block_type;
+    c->put_block_color = put_block_color;
+    c->put_best_ref    = put_best_ref;
+    c->put_block_mv    = put_block_mv;
+
+    c->init_frame_coder = init_frame_encoder;
+    c->reset_coder      = reset_coder;
+    c->copy_coder       = copy_coder;
+    c->free             = free_coder;
+
+    c->get_bits         = get_coder_bits;
+    c->available_bytes  = get_coder_available_bytes;
+}
+
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     SnowContext *s = avctx->priv_data;
-    int plane_index, ret;
-    int i;
+    int ret;

 #if FF_API_PRIVATE_OPT
 FF_DISABLE_DEPRECATION_WARNINGS
@@ -54,51 +202,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
         return -1;
     }
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->me_method == ME_ITER)
-        s->motion_est = FF_ME_ITER;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif

     s->spatial_decomposition_type= s->pred; //FIXME add decorrelator type r transform_type

-    s->mv_scale       = (avctx->flags & AV_CODEC_FLAG_QPEL) ? 2 : 4;
-    s->block_max_depth= (avctx->flags & AV_CODEC_FLAG_4MV ) ? 1 : 0;
-
-    for(plane_index=0; plane_index<3; plane_index++){
-        s->plane[plane_index].diag_mc= 1;
-        s->plane[plane_index].htaps= 6;
-        s->plane[plane_index].hcoeff[0]=  40;
-        s->plane[plane_index].hcoeff[1]= -10;
-        s->plane[plane_index].hcoeff[2]=   2;
-        s->plane[plane_index].fast_mc= 1;
-    }
-
     if ((ret = ff_snow_common_init(avctx)) < 0) {
         return ret;
     }
-    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
-
-    ff_snow_alloc_blocks(s);

     s->version=0;

-    s->m.avctx   = avctx;
-    s->m.bit_rate= avctx->bit_rate;
-
-    s->m.me.temp      =
-    s->m.me.scratchpad= av_mallocz_array((avctx->width+64), 2*16*2*sizeof(uint8_t));
-    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
-    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
-    s->m.sc.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
-    if (!s->m.me.scratchpad || !s->m.me.map || !s->m.me.score_map || !s->m.sc.obmc_scratchpad)
-        return AVERROR(ENOMEM);
-
-    ff_h263_encode_init(&s->m); //mv_penalty
-
-    s->max_ref_frames = av_clip(avctx->refs, 1, MAX_REF_FRAMES);
-
     if(avctx->flags&AV_CODEC_FLAG_PASS1){
         if(!avctx->stats_out)
             avctx->stats_out = av_mallocz(256);
@@ -106,11 +218,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
         if (!avctx->stats_out)
             return AVERROR(ENOMEM);
     }
-    if((avctx->flags&AV_CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
-        if(ff_rate_control_init(&s->m) < 0)
-            return -1;
-    }
-    s->pass1_rc= !(avctx->flags & (AV_CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));

     switch(avctx->pix_fmt){
     case AV_PIX_FMT_YUV444P:
@@ -134,644 +241,18 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);

-    ff_set_cmp(&s->mecc, s->mecc.me_cmp, s->avctx->me_cmp);
-    ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, s->avctx->me_sub_cmp);
+    ff_obmc_encode_init(&s->obmc, avctx);
+    ff_snow_init_encode_callbacks(&s->obmc.obmc_coder, avctx);

-    s->input_picture = av_frame_alloc();
-    if (!s->input_picture)
-        return AVERROR(ENOMEM);
-
-    if ((ret = ff_snow_get_buffer(s, s->input_picture)) < 0)
-        return ret;
-
-    if(s->motion_est == FF_ME_ITER){
-        int size= s->b_width * s->b_height << 2*s->block_max_depth;
-        for(i=0; i<s->max_ref_frames; i++){
-            s->ref_mvs[i]= av_mallocz_array(size, sizeof(int16_t[2]));
-            s->ref_scores[i]= av_mallocz_array(size, sizeof(uint32_t));
-            if (!s->ref_mvs[i] || !s->ref_scores[i])
-                return AVERROR(ENOMEM);
-        }
+    if((avctx->flags&AV_CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
+        if(ff_rate_control_init(&s->obmc.m) < 0)
+            return -1;
     }
+    s->pass1_rc= !(avctx->flags & (AV_CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));

     return 0;
 }

-//near copy & paste from dsputil, FIXME
-static int pix_sum(uint8_t * pix, int line_size, int w, int h)
-{
-    int s, i, j;
-
-    s = 0;
-    for (i = 0; i < h; i++) {
-        for (j = 0; j < w; j++) {
-            s += pix[0];
-            pix ++;
-        }
-        pix += line_size - w;
-    }
-    return s;
-}
-
-//near copy & paste from dsputil, FIXME
-static int pix_norm1(uint8_t * pix, int line_size, int w)
-{
-    int s, i, j;
-    uint32_t *sq = ff_square_tab + 256;
-
-    s = 0;
-    for (i = 0; i < w; i++) {
-        for (j = 0; j < w; j ++) {
-            s += sq[pix[0]];
-            pix ++;
-        }
-        pix += line_size - w;
-    }
-    return s;
-}
-
-static inline int get_penalty_factor(int lambda, int lambda2, int type){
-    switch(type&0xFF){
-    default:
-    case FF_CMP_SAD:
-        return lambda>>FF_LAMBDA_SHIFT;
-    case FF_CMP_DCT:
-        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
-    case FF_CMP_W53:
-        return (4*lambda)>>(FF_LAMBDA_SHIFT);
-    case FF_CMP_W97:
-        return (2*lambda)>>(FF_LAMBDA_SHIFT);
-    case FF_CMP_SATD:
-    case FF_CMP_DCT264:
-        return (2*lambda)>>FF_LAMBDA_SHIFT;
-    case FF_CMP_RD:
-    case FF_CMP_PSNR:
-    case FF_CMP_SSE:
-    case FF_CMP_NSSE:
-        return lambda2>>FF_LAMBDA_SHIFT;
-    case FF_CMP_BIT:
-        return 1;
-    }
-}
-
-//FIXME copy&paste
-#define P_LEFT P[1]
-#define P_TOP P[2]
-#define P_TOPRIGHT P[3]
-#define P_MEDIAN P[4]
-#define P_MV1 P[9]
-#define FLAG_QPEL   1 //must be 1
-
-static int encode_q_branch(SnowContext *s, int level, int x, int y){
-    uint8_t p_buffer[1024];
-    uint8_t i_buffer[1024];
-    uint8_t p_state[sizeof(s->block_state)];
-    uint8_t i_state[sizeof(s->block_state)];
-    RangeCoder pc, ic;
-    uint8_t *pbbak= s->c.bytestream;
-    uint8_t *pbbak_start= s->c.bytestream_start;
-    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
-    const int w= s->b_width  << s->block_max_depth;
-    const int h= s->b_height << s->block_max_depth;
-    const int rem_depth= s->block_max_depth - level;
-    const int index= (x + y*w) << rem_depth;
-    const int block_w= 1<<(LOG2_MB_SIZE - level);
-    int trx= (x+1)<<rem_depth;
-    int try= (y+1)<<rem_depth;
-    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
-    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
-    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
-    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
-    int pl = left->color[0];
-    int pcb= left->color[1];
-    int pcr= left->color[2];
-    int pmx, pmy;
-    int mx=0, my=0;
-    int l,cr,cb;
-    const int stride= s->current_picture->linesize[0];
-    const int uvstride= s->current_picture->linesize[1];
-    uint8_t *current_data[3]= { s->input_picture->data[0] + (x + y*  stride)*block_w,
-                                s->input_picture->data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift),
-                                s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
-    int P[10][2];
-    int16_t last_mv[3][2];
-    int qpel= !!(s->avctx->flags & AV_CODEC_FLAG_QPEL); //unused
-    const int shift= 1+qpel;
-    MotionEstContext *c= &s->m.me;
-    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
-    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
-    int my_context= av_log2(2*FFABS(left->my - top->my));
-    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
-    int ref, best_ref, ref_score, ref_mx, ref_my;
-
-    av_assert0(sizeof(s->block_state) >= 256);
-    if(s->keyframe){
-        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
-        return 0;
-    }
-
-//    clip predictors / edge ?
-
-    P_LEFT[0]= left->mx;
-    P_LEFT[1]= left->my;
-    P_TOP [0]= top->mx;
-    P_TOP [1]= top->my;
-    P_TOPRIGHT[0]= tr->mx;
-    P_TOPRIGHT[1]= tr->my;
-
-    last_mv[0][0]= s->block[index].mx;
-    last_mv[0][1]= s->block[index].my;
-    last_mv[1][0]= right->mx;
-    last_mv[1][1]= right->my;
-    last_mv[2][0]= bottom->mx;
-    last_mv[2][1]= bottom->my;
-
-    s->m.mb_stride=2;
-    s->m.mb_x=
-    s->m.mb_y= 0;
-    c->skip= 0;
-
-    av_assert1(c->  stride ==   stride);
-    av_assert1(c->uvstride == uvstride);
-
-    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
-    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
-    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
-    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_DMV;
-
-    c->xmin = - x*block_w - 16+3;
-    c->ymin = - y*block_w - 16+3;
-    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
-    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
-
-    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
-    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
-    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
-    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
-    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
-    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
-    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
-
-    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
-    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
-
-    if (!y) {
-        c->pred_x= P_LEFT[0];
-        c->pred_y= P_LEFT[1];
-    } else {
-        c->pred_x = P_MEDIAN[0];
-        c->pred_y = P_MEDIAN[1];
-    }
-
-    score= INT_MAX;
-    best_ref= 0;
-    for(ref=0; ref<s->ref_frames; ref++){
-        init_ref(c, current_data, s->last_picture[ref]->data, NULL, block_w*x, block_w*y, 0);
-
-        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
-                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
-
-        av_assert2(ref_mx >= c->xmin);
-        av_assert2(ref_mx <= c->xmax);
-        av_assert2(ref_my >= c->ymin);
-        av_assert2(ref_my <= c->ymax);
-
-        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
-        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
-        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
-        if(s->ref_mvs[ref]){
-            s->ref_mvs[ref][index][0]= ref_mx;
-            s->ref_mvs[ref][index][1]= ref_my;
-            s->ref_scores[ref][index]= ref_score;
-        }
-        if(score > ref_score){
-            score= ref_score;
-            best_ref= ref;
-            mx= ref_mx;
-            my= ref_my;
-        }
-    }
-    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
-
-  //  subpel search
-    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
-    pc= s->c;
-    pc.bytestream_start=
-    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
-    memcpy(p_state, s->block_state, sizeof(s->block_state));
-
-    if(level!=s->block_max_depth)
-        put_rac(&pc, &p_state[4 + s_context], 1);
-    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
-    if(s->ref_frames > 1)
-        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
-    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
-    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
-    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
-    p_len= pc.bytestream - pc.bytestream_start;
-    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
-
-    block_s= block_w*block_w;
-    sum = pix_sum(current_data[0], stride, block_w, block_w);
-    l= (sum + block_s/2)/block_s;
-    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
-
-    if (s->nb_planes > 2) {
-        block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
-        sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
-        cb= (sum + block_s/2)/block_s;
-    //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
-        sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
-        cr= (sum + block_s/2)/block_s;
-    //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
-    }else
-        cb = cr = 0;
-
-    ic= s->c;
-    ic.bytestream_start=
-    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
-    memcpy(i_state, s->block_state, sizeof(s->block_state));
-    if(level!=s->block_max_depth)
-        put_rac(&ic, &i_state[4 + s_context], 1);
-    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
-    put_symbol(&ic, &i_state[32],  l-pl , 1);
-    if (s->nb_planes > 2) {
-        put_symbol(&ic, &i_state[64], cb-pcb, 1);
-        put_symbol(&ic, &i_state[96], cr-pcr, 1);
-    }
-    i_len= ic.bytestream - ic.bytestream_start;
-    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
-
-    av_assert1(iscore < 255*255*256 + s->lambda2*10);
-    av_assert1(iscore >= 0);
-    av_assert1(l>=0 && l<=255);
-    av_assert1(pl>=0 && pl<=255);
-
-    if(level==0){
-        int varc= iscore >> 8;
-        int vard= score >> 8;
-        if (vard <= 64 || vard < varc)
-            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
-        else
-            c->scene_change_score+= s->m.qscale;
-    }
-
-    if(level!=s->block_max_depth){
-        put_rac(&s->c, &s->block_state[4 + s_context], 0);
-        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
-        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
-        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
-        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
-        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
-
-        if(score2 < score && score2 < iscore)
-            return score2;
-    }
-
-    if(iscore < score){
-        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
-        memcpy(pbbak, i_buffer, i_len);
-        s->c= ic;
-        s->c.bytestream_start= pbbak_start;
-        s->c.bytestream= pbbak + i_len;
-        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
-        memcpy(s->block_state, i_state, sizeof(s->block_state));
-        return iscore;
-    }else{
-        memcpy(pbbak, p_buffer, p_len);
-        s->c= pc;
-        s->c.bytestream_start= pbbak_start;
-        s->c.bytestream= pbbak + p_len;
-        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
-        memcpy(s->block_state, p_state, sizeof(s->block_state));
-        return score;
-    }
-}
-
-static void encode_q_branch2(SnowContext *s, int level, int x, int y){
-    const int w= s->b_width  << s->block_max_depth;
-    const int rem_depth= s->block_max_depth - level;
-    const int index= (x + y*w) << rem_depth;
-    int trx= (x+1)<<rem_depth;
-    BlockNode *b= &s->block[index];
-    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
-    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
-    int pl = left->color[0];
-    int pcb= left->color[1];
-    int pcr= left->color[2];
-    int pmx, pmy;
-    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
-    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
-    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
-    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
-
-    if(s->keyframe){
-        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
-        return;
-    }
-
-    if(level!=s->block_max_depth){
-        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
-            put_rac(&s->c, &s->block_state[4 + s_context], 1);
-        }else{
-            put_rac(&s->c, &s->block_state[4 + s_context], 0);
-            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
-            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
-            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
-            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
-            return;
-        }
-    }
-    if(b->type & BLOCK_INTRA){
-        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
-        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
-        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
-        if (s->nb_planes > 2) {
-            put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
-            put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
-        }
-        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
-    }else{
-        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
-        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
-        if(s->ref_frames > 1)
-            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
-        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
-        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
-        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
-    }
-}
-
-static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
-    int i, x2, y2;
-    Plane *p= &s->plane[plane_index];
-    const int block_size = MB_SIZE >> s->block_max_depth;
-    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
-    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
-    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
-    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
-    const int ref_stride= s->current_picture->linesize[plane_index];
-    uint8_t *src= s-> input_picture->data[plane_index];
-    IDWTELEM *dst= (IDWTELEM*)s->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
-    const int b_stride = s->b_width << s->block_max_depth;
-    const int w= p->width;
-    const int h= p->height;
-    int index= mb_x + mb_y*b_stride;
-    BlockNode *b= &s->block[index];
-    BlockNode backup= *b;
-    int ab=0;
-    int aa=0;
-
-    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
-
-    b->type|= BLOCK_INTRA;
-    b->color[plane_index]= 0;
-    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
-
-    for(i=0; i<4; i++){
-        int mb_x2= mb_x + (i &1) - 1;
-        int mb_y2= mb_y + (i>>1) - 1;
-        int x= block_w*mb_x2 + block_w/2;
-        int y= block_h*mb_y2 + block_h/2;
-
-        add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc,
-                    x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
-
-        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_h); y2++){
-            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
-                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride;
-                int obmc_v= obmc[index];
-                int d;
-                if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
-                if(x<0) obmc_v += obmc[index + block_w];
-                if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
-                if(x+block_w>w) obmc_v += obmc[index - block_w];
-                //FIXME precalculate this or simplify it somehow else
-
-                d = -dst[index] + (1<<(FRAC_BITS-1));
-                dst[index] = d;
-                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
-                aa += obmc_v * obmc_v; //FIXME precalculate this
-            }
-        }
-    }
-    *b= backup;
-
-    return av_clip_uint8( ROUNDED_DIV(ab<<LOG2_OBMC_MAX, aa) ); //FIXME we should not need clipping
-}
-
-static inline int get_block_bits(SnowContext *s, int x, int y, int w){
-    const int b_stride = s->b_width << s->block_max_depth;
-    const int b_height = s->b_height<< s->block_max_depth;
-    int index= x + y*b_stride;
-    const BlockNode *b     = &s->block[index];
-    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
-    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
-    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
-    int dmx, dmy;
-//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
-//  int my_context= av_log2(2*FFABS(left->my - top->my));
-
-    if(x<0 || x>=b_stride || y>=b_height)
-        return 0;
-/*
-1            0      0
-01X          1-2    1
-001XX        3-6    2-3
-0001XXX      7-14   4-7
-00001XXXX   15-30   8-15
-*/
-//FIXME try accurate rate
-//FIXME intra and inter predictors if surrounding blocks are not the same type
-    if(b->type & BLOCK_INTRA){
-        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
-                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
-                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
-    }else{
-        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
-        dmx-= b->mx;
-        dmy-= b->my;
-        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
-                    + av_log2(2*FFABS(dmy))
-                    + av_log2(2*b->ref));
-    }
-}
-
-static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2]){
-    Plane *p= &s->plane[plane_index];
-    const int block_size = MB_SIZE >> s->block_max_depth;
-    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
-    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
-    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
-    const int ref_stride= s->current_picture->linesize[plane_index];
-    uint8_t *dst= s->current_picture->data[plane_index];
-    uint8_t *src= s->  input_picture->data[plane_index];
-    IDWTELEM *pred= (IDWTELEM*)s->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4;
-    uint8_t *cur = s->scratchbuf;
-    uint8_t *tmp = s->emu_edge_buffer;
-    const int b_stride = s->b_width << s->block_max_depth;
-    const int b_height = s->b_height<< s->block_max_depth;
-    const int w= p->width;
-    const int h= p->height;
-    int distortion;
-    int rate= 0;
-    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
-    int sx= block_w*mb_x - block_w/2;
-    int sy= block_h*mb_y - block_h/2;
-    int x0= FFMAX(0,-sx);
-    int y0= FFMAX(0,-sy);
-    int x1= FFMIN(block_w*2, w-sx);
-    int y1= FFMIN(block_h*2, h-sy);
-    int i,x,y;
-
-    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
-
-    ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
-
-    for(y=y0; y<y1; y++){
-        const uint8_t *obmc1= obmc_edged[y];
-        const IDWTELEM *pred1 = pred + y*obmc_stride;
-        uint8_t *cur1 = cur + y*ref_stride;
-        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
-        for(x=x0; x<x1; x++){
-#if FRAC_BITS >= LOG2_OBMC_MAX
-            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
-#else
-            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
-#endif
-            v = (v + pred1[x]) >> FRAC_BITS;
-            if(v&(~255)) v= ~(v>>31);
-            dst1[x] = v;
-        }
-    }
-
-    /* copy the regions where obmc[] = (uint8_t)256 */
-    if(LOG2_OBMC_MAX == 8
-        && (mb_x == 0 || mb_x == b_stride-1)
-        && (mb_y == 0 || mb_y == b_height-1)){
-        if(mb_x == 0)
-            x1 = block_w;
-        else
-            x0 = block_w;
-        if(mb_y == 0)
-            y1 = block_h;
-        else
-            y0 = block_h;
-        for(y=y0; y<y1; y++)
-            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
-    }
-
-    if(block_w==16){
-        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
-        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
-        /* FIXME cmps overlap but do not cover the wavelet's whole support.
-         * So improving the score of one block is not strictly guaranteed
-         * to improve the score of the whole frame, thus iterative motion
-         * estimation does not always converge. */
-        if(s->avctx->me_cmp == FF_CMP_W97)
-            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
-        else if(s->avctx->me_cmp == FF_CMP_W53)
-            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
-        else{
-            distortion = 0;
-            for(i=0; i<4; i++){
-                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
-                distortion += s->mecc.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
-            }
-        }
-    }else{
-        av_assert2(block_w==8);
-        distortion = s->mecc.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
-    }
-
-    if(plane_index==0){
-        for(i=0; i<4; i++){
-/* ..RRr
- * .RXx.
- * rxx..
- */
-            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
-        }
-        if(mb_x == b_stride-2)
-            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
-    }
-    return distortion + rate*penalty_factor;
-}
-
-static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
-    int i, y2;
-    Plane *p= &s->plane[plane_index];
-    const int block_size = MB_SIZE >> s->block_max_depth;
-    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
-    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
-    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
-    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
-    const int ref_stride= s->current_picture->linesize[plane_index];
-    uint8_t *dst= s->current_picture->data[plane_index];
-    uint8_t *src= s-> input_picture->data[plane_index];
-    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
-    // const has only been removed from zero_dst to suppress a warning
-    static IDWTELEM zero_dst[4096]; //FIXME
-    const int b_stride = s->b_width << s->block_max_depth;
-    const int w= p->width;
-    const int h= p->height;
-    int distortion= 0;
-    int rate= 0;
-    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
-
-    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
-
-    for(i=0; i<9; i++){
-        int mb_x2= mb_x + (i%3) - 1;
-        int mb_y2= mb_y + (i/3) - 1;
-        int x= block_w*mb_x2 + block_w/2;
-        int y= block_h*mb_y2 + block_h/2;
-
-        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
-                   x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
-
-        //FIXME find a cleaner/simpler way to skip the outside stuff
-        for(y2= y; y2<0; y2++)
-            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
-        for(y2= h; y2<y+block_h; y2++)
-            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
-        if(x<0){
-            for(y2= y; y2<y+block_h; y2++)
-                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
-        }
-        if(x+block_w > w){
-            for(y2= y; y2<y+block_h; y2++)
-                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
-        }
-
-        av_assert1(block_w== 8 || block_w==16);
-        distortion += s->mecc.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h);
-    }
-
-    if(plane_index==0){
-        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
-        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
-
-/* ..RRRr
- * .RXXx.
- * .RXXx.
- * rxxx.
- */
-        if(merged)
-            rate = get_block_bits(s, mb_x, mb_y, 2);
-        for(i=merged?4:0; i<9; i++){
-            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
-            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
-        }
-    }
-    return distortion + rate*penalty_factor;
-}
-
 static int encode_subband_c0run(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
     const int w= b->width;
     const int h= b->height;
@@ -899,347 +380,6 @@ static int encode_subband(SnowContext *s, SubBand *b, const IDWTELEM *src, const
 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
 }

-static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
-    const int b_stride= s->b_width << s->block_max_depth;
-    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
-    BlockNode backup= *block;
-    unsigned value;
-    int rd, index;
-
-    av_assert2(mb_x>=0 && mb_y>=0);
-    av_assert2(mb_x<b_stride);
-
-    if(intra){
-        block->color[0] = p[0];
-        block->color[1] = p[1];
-        block->color[2] = p[2];
-        block->type |= BLOCK_INTRA;
-    }else{
-        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
-        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
-        if(s->me_cache[index] == value)
-            return 0;
-        s->me_cache[index]= value;
-
-        block->mx= p[0];
-        block->my= p[1];
-        block->type &= ~BLOCK_INTRA;
-    }
-
-    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged) + s->intra_penalty * !!intra;
-
-//FIXME chroma
-    if(rd < *best_rd){
-        *best_rd= rd;
-        return 1;
-    }else{
-        *block= backup;
-        return 0;
-    }
-}
-
-/* special case for int[2] args we discard afterwards,
- * fixes compilation problem with gcc 2.95 */
-static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
-    int p[2] = {p0, p1};
-    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
-}
-
-static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
-    const int b_stride= s->b_width << s->block_max_depth;
-    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
-    BlockNode backup[4];
-    unsigned value;
-    int rd, index;
-
-    /* We don't initialize backup[] during variable declaration, because
-     * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
-     * 'int16_t'". */
-    backup[0] = block[0];
-    backup[1] = block[1];
-    backup[2] = block[b_stride];
-    backup[3] = block[b_stride + 1];
-
-    av_assert2(mb_x>=0 && mb_y>=0);
-    av_assert2(mb_x<b_stride);
-    av_assert2(((mb_x|mb_y)&1) == 0);
-
-    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
-    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
-    if(s->me_cache[index] == value)
-        return 0;
-    s->me_cache[index]= value;
-
-    block->mx= p0;
-    block->my= p1;
-    block->ref= ref;
-    block->type &= ~BLOCK_INTRA;
-    block[1]= block[b_stride]= block[b_stride+1]= *block;
-
-    rd= get_4block_rd(s, mb_x, mb_y, 0);
-
-//FIXME chroma
-    if(rd < *best_rd){
-        *best_rd= rd;
-        return 1;
-    }else{
-        block[0]= backup[0];
-        block[1]= backup[1];
-        block[b_stride]= backup[2];
-        block[b_stride+1]= backup[3];
-        return 0;
-    }
-}
-
-static void iterative_me(SnowContext *s){
-    int pass, mb_x, mb_y;
-    const int b_width = s->b_width  << s->block_max_depth;
-    const int b_height= s->b_height << s->block_max_depth;
-    const int b_stride= b_width;
-    int color[3];
-
-    {
-        RangeCoder r = s->c;
-        uint8_t state[sizeof(s->block_state)];
-        memcpy(state, s->block_state, sizeof(s->block_state));
-        for(mb_y= 0; mb_y<s->b_height; mb_y++)
-            for(mb_x= 0; mb_x<s->b_width; mb_x++)
-                encode_q_branch(s, 0, mb_x, mb_y);
-        s->c = r;
-        memcpy(s->block_state, state, sizeof(s->block_state));
-    }
-
-    for(pass=0; pass<25; pass++){
-        int change= 0;
-
-        for(mb_y= 0; mb_y<b_height; mb_y++){
-            for(mb_x= 0; mb_x<b_width; mb_x++){
-                int dia_change, i, j, ref;
-                int best_rd= INT_MAX, ref_rd;
-                BlockNode backup, ref_b;
-                const int index= mb_x + mb_y * b_stride;
-                BlockNode *block= &s->block[index];
-                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
-                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
-                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
-                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
-                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
-                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
-                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
-                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
-                const int b_w= (MB_SIZE >> s->block_max_depth);
-                uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];
-
-                if(pass && (block->type & BLOCK_OPT))
-                    continue;
-                block->type |= BLOCK_OPT;
-
-                backup= *block;
-
-                if(!s->me_cache_generation)
-                    memset(s->me_cache, 0, sizeof(s->me_cache));
-                s->me_cache_generation += 1<<22;
-
-                //FIXME precalculate
-                {
-                    int x, y;
-                    for (y = 0; y < b_w * 2; y++)
-                        memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
-                    if(mb_x==0)
-                        for(y=0; y<b_w*2; y++)
-                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
-                    if(mb_x==b_stride-1)
-                        for(y=0; y<b_w*2; y++)
-                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
-                    if(mb_y==0){
-                        for(x=0; x<b_w*2; x++)
-                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
-                        for(y=1; y<b_w; y++)
-                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
-                    }
-                    if(mb_y==b_height-1){
-                        for(x=0; x<b_w*2; x++)
-                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
-                        for(y=b_w; y<b_w*2-1; y++)
-                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
-                    }
-                }
-
-                //skip stuff outside the picture
-                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
-                    uint8_t *src= s->  input_picture->data[0];
-                    uint8_t *dst= s->current_picture->data[0];
-                    const int stride= s->current_picture->linesize[0];
-                    const int block_w= MB_SIZE >> s->block_max_depth;
-                    const int block_h= MB_SIZE >> s->block_max_depth;
-                    const int sx= block_w*mb_x - block_w/2;
-                    const int sy= block_h*mb_y - block_h/2;
-                    const int w= s->plane[0].width;
-                    const int h= s->plane[0].height;
-                    int y;
-
-                    for(y=sy; y<0; y++)
-                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
-                    for(y=h; y<sy+block_h*2; y++)
-                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
-                    if(sx<0){
-                        for(y=sy; y<sy+block_h*2; y++)
-                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
-                    }
-                    if(sx+block_w*2 > w){
-                        for(y=sy; y<sy+block_h*2; y++)
-                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
-                    }
-                }
-
-                // intra(black) = neighbors' contribution to the current block
-                for(i=0; i < s->nb_planes; i++)
-                    color[i]= get_dc(s, mb_x, mb_y, i);
-
-                // get previous score (cannot be cached due to OBMC)
-                if(pass > 0 && (block->type&BLOCK_INTRA)){
-                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
-                    check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd);
-                }else
-                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);
-
-                ref_b= *block;
-                ref_rd= best_rd;
-                for(ref=0; ref < s->ref_frames; ref++){
-                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
-                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
-                        continue;
-                    block->ref= ref;
-                    best_rd= INT_MAX;
-
-                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
-                    check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
-                    if(tb)
-                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
-                    if(lb)
-                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
-                    if(rb)
-                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
-                    if(bb)
-                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);
-
-                    /* fullpel ME */
-                    //FIXME avoid subpel interpolation / round to nearest integer
-                    do{
-                        int newx = block->mx;
-                        int newy = block->my;
-                        int dia_size = s->iterative_dia_size ? s->iterative_dia_size : FFMAX(s->avctx->dia_size, 1);
-                        dia_change=0;
-                        for(i=0; i < dia_size; i++){
-                            for(j=0; j<i; j++){
-                                dia_change |= check_block_inter(s, mb_x, mb_y, newx+4*(i-j), newy+(4*j), obmc_edged, &best_rd);
-                                dia_change |= check_block_inter(s, mb_x, mb_y, newx-4*(i-j), newy-(4*j), obmc_edged, &best_rd);
-                                dia_change |= check_block_inter(s, mb_x, mb_y, newx-(4*j), newy+4*(i-j), obmc_edged, &best_rd);
-                                dia_change |= check_block_inter(s, mb_x, mb_y, newx+(4*j), newy-4*(i-j), obmc_edged, &best_rd);
-                            }
-                        }
-                    }while(dia_change);
-                    /* subpel ME */
-                    do{
-                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
-                        dia_change=0;
-                        for(i=0; i<8; i++)
-                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
-                    }while(dia_change);
-                    //FIXME or try the standard 2 pass qpel or similar
-
-                    mvr[0][0]= block->mx;
-                    mvr[0][1]= block->my;
-                    if(ref_rd > best_rd){
-                        ref_rd= best_rd;
-                        ref_b= *block;
-                    }
-                }
-                best_rd= ref_rd;
-                *block= ref_b;
-                check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd);
-                //FIXME RD style color selection
-                if(!same_block(block, &backup)){
-                    if(tb ) tb ->type &= ~BLOCK_OPT;
-                    if(lb ) lb ->type &= ~BLOCK_OPT;
-                    if(rb ) rb ->type &= ~BLOCK_OPT;
-                    if(bb ) bb ->type &= ~BLOCK_OPT;
-                    if(tlb) tlb->type &= ~BLOCK_OPT;
-                    if(trb) trb->type &= ~BLOCK_OPT;
-                    if(blb) blb->type &= ~BLOCK_OPT;
-                    if(brb) brb->type &= ~BLOCK_OPT;
-                    change ++;
-                }
-            }
-        }
-        av_log(s->avctx, AV_LOG_DEBUG, "pass:%d changed:%d\n", pass, change);
-        if(!change)
-            break;
-    }
-
-    if(s->block_max_depth == 1){
-        int change= 0;
-        for(mb_y= 0; mb_y<b_height; mb_y+=2){
-            for(mb_x= 0; mb_x<b_width; mb_x+=2){
-                int i;
-                int best_rd, init_rd;
-                const int index= mb_x + mb_y * b_stride;
-                BlockNode *b[4];
-
-                b[0]= &s->block[index];
-                b[1]= b[0]+1;
-                b[2]= b[0]+b_stride;
-                b[3]= b[2]+1;
-                if(same_block(b[0], b[1]) &&
-                   same_block(b[0], b[2]) &&
-                   same_block(b[0], b[3]))
-                    continue;
-
-                if(!s->me_cache_generation)
-                    memset(s->me_cache, 0, sizeof(s->me_cache));
-                s->me_cache_generation += 1<<22;
-
-                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
-
-                //FIXME more multiref search?
-                check_4block_inter(s, mb_x, mb_y,
-                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
-                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
-
-                for(i=0; i<4; i++)
-                    if(!(b[i]->type&BLOCK_INTRA))
-                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
-
-                if(init_rd != best_rd)
-                    change++;
-            }
-        }
-        av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
-    }
-}
-
-static void encode_blocks(SnowContext *s, int search){
-    int x, y;
-    int w= s->b_width;
-    int h= s->b_height;
-
-    if(s->motion_est == FF_ME_ITER && !s->keyframe && search)
-        iterative_me(s);
-
-    for(y=0; y<h; y++){
-        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
-            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
-            return;
-        }
-        for(x=0; x<w; x++){
-            if(s->motion_est == FF_ME_ITER || !search)
-                encode_q_branch2(s, 0, x, y);
-            else
-                encode_q_branch (s, 0, x, y);
-        }
-    }
-}
-
 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
     const int w= b->width;
     const int h= b->height;
@@ -1396,10 +536,10 @@ static void encode_header(SnowContext *s){
         s->last_spatial_decomposition_type=
         s->last_qlog=
         s->last_qbias=
-        s->last_mv_scale=
-        s->last_block_max_depth= 0;
+        s->obmc.last_mv_scale=
+        s->obmc.last_block_max_depth= 0;
         for(plane_index=0; plane_index<2; plane_index++){
-            Plane *p= &s->plane[plane_index];
+            PlaneObmc *p= &s->obmc.plane[plane_index];
             p->last_htaps=0;
             p->last_diag_mc=0;
             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
@@ -1418,7 +558,7 @@ static void encode_header(SnowContext *s){
         }
         put_rac(&s->c, s->header_state, s->spatial_scalability);
 //        put_rac(&s->c, s->header_state, s->rate_scalability);
-        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
+        put_symbol(&s->c, s->header_state, s->obmc.max_ref_frames-1, 0);

         encode_qlogs(s);
     }
@@ -1426,7 +566,7 @@ static void encode_header(SnowContext *s){
     if(!s->keyframe){
         int update_mc=0;
         for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
-            Plane *p= &s->plane[plane_index];
+            PlaneObmc *p= &s->obmc.plane[plane_index];
             update_mc |= p->last_htaps   != p->htaps;
             update_mc |= p->last_diag_mc != p->diag_mc;
             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
@@ -1434,7 +574,7 @@ static void encode_header(SnowContext *s){
         put_rac(&s->c, s->header_state, update_mc);
         if(update_mc){
             for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
-                Plane *p= &s->plane[plane_index];
+                PlaneObmc *p= &s->obmc.plane[plane_index];
                 put_rac(&s->c, s->header_state, p->diag_mc);
                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
                 for(i= p->htaps/2; i; i--)
@@ -1450,10 +590,10 @@ static void encode_header(SnowContext *s){
     }

     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
-    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
-    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
-    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
-    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
+    put_symbol(&s->c, s->header_state, s->qlog                 - s->last_qlog    , 1);
+    put_symbol(&s->c, s->header_state, s->obmc.mv_scale        - s->obmc.last_mv_scale, 1);
+    put_symbol(&s->c, s->header_state, s->qbias                - s->last_qbias   , 1);
+    put_symbol(&s->c, s->header_state, s->obmc.block_max_depth - s->obmc.last_block_max_depth, 1);

 }

@@ -1462,7 +602,7 @@ static void update_last_header_values(SnowContext *s){

     if(!s->keyframe){
         for(plane_index=0; plane_index<2; plane_index++){
-            Plane *p= &s->plane[plane_index];
+            PlaneObmc *p= &s->obmc.plane[plane_index];
             p->last_diag_mc= p->diag_mc;
             p->last_htaps  = p->htaps;
             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
@@ -1472,8 +612,8 @@ static void update_last_header_values(SnowContext *s){
     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
     s->last_qlog                        = s->qlog;
     s->last_qbias                       = s->qbias;
-    s->last_mv_scale                    = s->mv_scale;
-    s->last_block_max_depth             = s->block_max_depth;
+    s->obmc.last_mv_scale               = s->obmc.mv_scale;
+    s->obmc.last_block_max_depth        = s->obmc.block_max_depth;
     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
 }

@@ -1518,17 +658,17 @@ static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;

     if(pict->pict_type == AV_PICTURE_TYPE_I){
-        s->m.current_picture.mb_var_sum= coef_sum;
-        s->m.current_picture.mc_mb_var_sum= 0;
+        s->obmc.m.current_picture.mb_var_sum= coef_sum;
+        s->obmc.m.current_picture.mc_mb_var_sum= 0;
     }else{
-        s->m.current_picture.mc_mb_var_sum= coef_sum;
-        s->m.current_picture.mb_var_sum= 0;
+        s->obmc.m.current_picture.mc_mb_var_sum= coef_sum;
+        s->obmc.m.current_picture.mb_var_sum= 0;
     }

-    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
+    pict->quality= ff_rate_estimate_qscale(&s->obmc.m, 1);
     if (pict->quality < 0)
         return INT_MIN;
-    s->lambda= pict->quality * 3/2;
+    s->obmc.lambda= pict->quality * 3/2;
     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
     s->qlog+= delta_qlog;
     return delta_qlog;
@@ -1545,12 +685,12 @@ static void calculate_visual_weight(SnowContext *s, Plane *p){
             IDWTELEM *ibuf= b->ibuf;
             int64_t error=0;

-            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
+            memset(s->obmc.spatial_idwt_buffer, 0, sizeof(*s->obmc.spatial_idwt_buffer)*width*height);
             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
-            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_idwt(s->obmc.spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
             for(y=0; y<height; y++){
                 for(x=0; x<width; x++){
-                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
+                    int64_t d= s->obmc.spatial_idwt_buffer[x + y*width]*16;
                     error += d*d;
                 }
             }
@@ -1572,7 +712,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t rc_header_bak[sizeof(s->header_state)];
     uint8_t rc_block_bak[sizeof(s->block_state)];

-    if ((ret = ff_alloc_packet2(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->obmc.b_width*s->obmc.b_height*MB_SIZE*MB_SIZE*3 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;

     ff_init_range_encoder(c, pkt->data, pkt->size);
@@ -1582,124 +722,55 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         int hshift= i ? s->chroma_h_shift : 0;
         int vshift= i ? s->chroma_v_shift : 0;
         for(y=0; y<AV_CEIL_RSHIFT(height, vshift); y++)
-            memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
+            memcpy(&s->obmc.input_picture->data[i][y * s->obmc.input_picture->linesize[i]],
                    &pict->data[i][y * pict->linesize[i]],
                    AV_CEIL_RSHIFT(width, hshift));
-        s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
+        s->obmc.mpvencdsp.draw_edges(s->obmc.input_picture->data[i], s->obmc.input_picture->linesize[i],
                                 AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift),
                                 EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
                                 EDGE_TOP | EDGE_BOTTOM);

     }
     emms_c();
-    pic = s->input_picture;
+    pic = s->obmc.input_picture;
     pic->pict_type = pict->pict_type;
     pic->quality = pict->quality;

-    s->m.picture_number= avctx->frame_number;
+    s->obmc.m.picture_number= avctx->frame_number;
     if(avctx->flags&AV_CODEC_FLAG_PASS2){
-        s->m.pict_type = pic->pict_type = s->m.rc_context.entry[avctx->frame_number].new_pict_type;
+        s->obmc.m.pict_type = pic->pict_type = s->obmc.m.rc_context.entry[avctx->frame_number].new_pict_type;
         s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I;
+        s->obmc.keyframe = s->keyframe;
         if(!(avctx->flags&AV_CODEC_FLAG_QSCALE)) {
-            pic->quality = ff_rate_estimate_qscale(&s->m, 0);
+            pic->quality = ff_rate_estimate_qscale(&s->obmc.m, 0);
             if (pic->quality < 0)
                 return -1;
         }
     }else{
         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
-        s->m.pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+        s->obmc.m.pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+        s->obmc.keyframe = s->keyframe;
     }

     if(s->pass1_rc && avctx->frame_number == 0)
         pic->quality = 2*FF_QP2LAMBDA;
     if (pic->quality) {
         s->qlog   = qscale2qlog(pic->quality);
-        s->lambda = pic->quality * 3/2;
+        s->obmc.lambda = pic->quality * 3/2;
     }
     if (s->qlog < 0 || (!pic->quality && (avctx->flags & AV_CODEC_FLAG_QSCALE))) {
         s->qlog= LOSSLESS_QLOG;
-        s->lambda = 0;
+        s->obmc.lambda = 0;
     }//else keep previous frame's qlog until after motion estimation

-    if (s->current_picture->data[0]
-#if FF_API_EMU_EDGE
-        && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)
-#endif
-        ) {
-        int w = s->avctx->width;
-        int h = s->avctx->height;
-
-        s->mpvencdsp.draw_edges(s->current_picture->data[0],
-                                s->current_picture->linesize[0], w   , h   ,
-                                EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
-        if (s->current_picture->data[2]) {
-            s->mpvencdsp.draw_edges(s->current_picture->data[1],
-                                    s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
-                                    EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
-            s->mpvencdsp.draw_edges(s->current_picture->data[2],
-                                    s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
-                                    EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
-        }
-    }
-
-    ff_snow_frame_start(s);
-    av_frame_unref(avctx->coded_frame);
-    ret = av_frame_ref(avctx->coded_frame, s->current_picture);
-    if (ret < 0)
-        return ret;
-
-    s->m.current_picture_ptr= &s->m.current_picture;
-    s->m.current_picture.f = s->current_picture;
-    s->m.current_picture.f->pts = pict->pts;
-    if(pic->pict_type == AV_PICTURE_TYPE_P){
-        int block_width = (width +15)>>4;
-        int block_height= (height+15)>>4;
-        int stride= s->current_picture->linesize[0];
-
-        av_assert0(s->current_picture->data[0]);
-        av_assert0(s->last_picture[0]->data[0]);
-
-        s->m.avctx= s->avctx;
-        s->m.   last_picture.f = s->last_picture[0];
-        s->m.    new_picture.f = s->input_picture;
-        s->m.   last_picture_ptr= &s->m.   last_picture;
-        s->m.linesize = stride;
-        s->m.uvlinesize= s->current_picture->linesize[1];
-        s->m.width = width;
-        s->m.height= height;
-        s->m.mb_width = block_width;
-        s->m.mb_height= block_height;
-        s->m.mb_stride=   s->m.mb_width+1;
-        s->m.b8_stride= 2*s->m.mb_width+1;
-        s->m.f_code=1;
-        s->m.pict_type = pic->pict_type;
-#if FF_API_MOTION_EST
-        s->m.me_method= s->avctx->me_method;
-#endif
-        s->m.motion_est= s->motion_est;
-        s->m.me.scene_change_score=0;
-        s->m.me.dia_size = avctx->dia_size;
-        s->m.quarter_sample= (s->avctx->flags & AV_CODEC_FLAG_QPEL)!=0;
-        s->m.out_format= FMT_H263;
-        s->m.unrestricted_mv= 1;
-
-        s->m.lambda = s->lambda;
-        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
-        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
-
-        s->m.mecc= s->mecc; //move
-        s->m.qdsp= s->qdsp; //move
-        s->m.hdsp = s->hdsp;
-        ff_init_me(&s->m);
-        s->hdsp = s->m.hdsp;
-        s->mecc= s->m.mecc;
-    }
-
     if(s->pass1_rc){
         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
     }

+    if ((ret = ff_obmc_pre_encode_frame(&s->obmc, avctx, pict)) < 0)
+        return ret;
+
 redo_frame:

     s->spatial_decomposition_count= 5;
@@ -1713,7 +784,7 @@ redo_frame:
         return AVERROR(EINVAL);
     }

-    s->m.pict_type = pic->pict_type;
+    s->obmc.m.pict_type = pic->pict_type;
     s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;

     ff_snow_common_init_after_header(avctx);
@@ -1725,9 +796,9 @@ redo_frame:
     }

     encode_header(s);
-    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
-    encode_blocks(s, 1);
-    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
+    s->obmc.m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
+    ff_obmc_encode_blocks(&s->obmc, 1);
+    s->obmc.m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->obmc.m.misc_bits;

     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
         Plane *p= &s->plane[plane_index];
@@ -1741,10 +812,10 @@ redo_frame:
             if(pict->data[plane_index]) //FIXME gray hack
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
-                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
+                        s->obmc.spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
                     }
                 }
-            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
+            predict_plane(&s->obmc, s->obmc.spatial_idwt_buffer, plane_index, 0);

 #if FF_API_PRIVATE_OPT
 FF_DISABLE_DEPRECATION_WARNINGS
@@ -1756,25 +827,26 @@ FF_ENABLE_DEPRECATION_WARNINGS
             if(   plane_index==0
                && pic->pict_type == AV_PICTURE_TYPE_P
                && !(avctx->flags&AV_CODEC_FLAG_PASS2)
-               && s->m.me.scene_change_score > s->scenechange_threshold){
+               && s->obmc.m.me.scene_change_score > s->scenechange_threshold){
                 ff_init_range_encoder(c, pkt->data, pkt->size);
                 ff_build_rac_states(c, (1LL<<32)/20, 256-8);
                 pic->pict_type= AV_PICTURE_TYPE_I;
                 s->keyframe=1;
-                s->current_picture->key_frame=1;
+                s->obmc.keyframe = 1;
+                s->obmc.current_picture->key_frame=1;
                 goto redo_frame;
             }

             if(s->qlog == LOSSLESS_QLOG){
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
-                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
+                        s->spatial_dwt_buffer[y*w + x]= (s->obmc.spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
                     }
                 }
             }else{
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
-                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
+                        s->spatial_dwt_buffer[y*w + x]=s->obmc.spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
                     }
                 }
             }
@@ -1791,7 +863,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
                     encode_header(s);
-                    encode_blocks(s, 0);
+                    ff_obmc_encode_blocks(&s->obmc, 0);
                 }
             }

@@ -1818,27 +890,27 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 }
             }

-            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_idwt(s->obmc.spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
             if(s->qlog == LOSSLESS_QLOG){
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
-                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
+                        s->obmc.spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
                     }
                 }
             }
-            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+            predict_plane(&s->obmc, s->obmc.spatial_idwt_buffer, plane_index, 1);
         }else{
             //ME/MC only
             if(pic->pict_type == AV_PICTURE_TYPE_I){
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
-                        s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x]=
+                        s->obmc.current_picture->data[plane_index][y*s->obmc.current_picture->linesize[plane_index] + x]=
                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
                     }
                 }
             }else{
-                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
-                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+                memset(s->obmc.spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(&s->obmc, s->obmc.spatial_idwt_buffer, plane_index, 1);
             }
         }
         if(s->avctx->flags&AV_CODEC_FLAG_PSNR){
@@ -1847,7 +919,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             if(pict->data[plane_index]) //FIXME gray hack
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
-                        int d= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
+                        int d= s->obmc.current_picture->data[plane_index][y*s->obmc.current_picture->linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
                         error += d*d;
                     }
                 }
@@ -1859,43 +931,43 @@ FF_ENABLE_DEPRECATION_WARNINGS

     update_last_header_values(s);

-    ff_snow_release_buffer(avctx);
-
-    s->current_picture->coded_picture_number = avctx->frame_number;
-    s->current_picture->pict_type = pic->pict_type;
-    s->current_picture->quality = pic->quality;
-    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
-    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
-    s->m.current_picture.f->display_picture_number =
-    s->m.current_picture.f->coded_picture_number   = avctx->frame_number;
-    s->m.current_picture.f->quality                = pic->quality;
-    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
+    ff_obmc_release_buffer(&s->obmc);
+
+    s->obmc.current_picture->coded_picture_number = avctx->frame_number;
+    s->obmc.current_picture->pict_type = pic->pict_type;
+    s->obmc.current_picture->quality = pic->quality;
+    s->obmc.m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
+    s->obmc.m.p_tex_bits = s->obmc.m.frame_bits - s->obmc.m.misc_bits - s->obmc.m.mv_bits;
+    s->obmc.m.current_picture.f->display_picture_number =
+    s->obmc.m.current_picture.f->coded_picture_number   = avctx->frame_number;
+    s->obmc.m.current_picture.f->quality                = pic->quality;
+    s->obmc.m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
     if(s->pass1_rc)
-        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
+        if (ff_rate_estimate_qscale(&s->obmc.m, 0) < 0)
             return -1;
     if(avctx->flags&AV_CODEC_FLAG_PASS1)
-        ff_write_pass1_stats(&s->m);
-    s->m.last_pict_type = s->m.pict_type;
-    avctx->frame_bits = s->m.frame_bits;
-    avctx->mv_bits = s->m.mv_bits;
-    avctx->misc_bits = s->m.misc_bits;
-    avctx->p_tex_bits = s->m.p_tex_bits;
+        ff_write_pass1_stats(&s->obmc.m);
+    s->obmc.m.last_pict_type = s->obmc.m.pict_type;
+    avctx->frame_bits = s->obmc.m.frame_bits;
+    avctx->mv_bits = s->obmc.m.mv_bits;
+    avctx->misc_bits = s->obmc.m.misc_bits;
+    avctx->p_tex_bits = s->obmc.m.p_tex_bits;

     emms_c();

-    ff_side_data_set_encoder_stats(pkt, s->current_picture->quality,
+    ff_side_data_set_encoder_stats(pkt, s->obmc.current_picture->quality,
                                    s->encoding_error,
                                    (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
-                                   s->current_picture->pict_type);
+                                   s->obmc.current_picture->pict_type);

 #if FF_API_ERROR_FRAME
 FF_DISABLE_DEPRECATION_WARNINGS
-    memcpy(s->current_picture->error, s->encoding_error, sizeof(s->encoding_error));
+    memcpy(s->obmc.current_picture->error, s->encoding_error, sizeof(s->encoding_error));
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif

     pkt->size = ff_rac_terminate(c);
-    if (s->current_picture->key_frame)
+    if (s->obmc.current_picture->key_frame)
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;

@@ -1907,22 +979,23 @@ static av_cold int encode_end(AVCodecContext *avctx)
     SnowContext *s = avctx->priv_data;

     ff_snow_common_end(s);
-    ff_rate_control_uninit(&s->m);
-    av_frame_free(&s->input_picture);
+    ff_rate_control_uninit(&s->obmc.m);
+    av_frame_free(&s->obmc.input_picture);
     av_freep(&avctx->stats_out);

     return 0;
 }

 #define OFFSET(x) offsetof(SnowContext, x)
+#define OFFSET_OBMC(x) offsetof(SnowContext, obmc) + offsetof(OBMCContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     FF_MPV_COMMON_OPTS
     { "iter",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ITER }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" },
     { "memc_only",      "Only do ME/MC (I frames -> ref, P frame -> ME+MC).",   OFFSET(memc_only), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "no_bitstream",   "Skip final bitstream writeout.",                    OFFSET(no_bitstream), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
-    { "intra_penalty",  "Penalty for intra blocks in block decission",      OFFSET(intra_penalty), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
-    { "iterative_dia_size",  "Dia size for the iterative ME",          OFFSET(iterative_dia_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "intra_penalty",  "Penalty for intra blocks in block decission", OFFSET_OBMC(intra_penalty), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "iterative_dia_size",  "Dia size for the iterative ME",     OFFSET_OBMC(iterative_dia_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
     { "sc_threshold",   "Scene change threshold",                   OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, VE },
     { "pred",           "Spatial decomposition type",                                OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 0 }, DWT_97, DWT_53, VE, "pred" },
         { "dwt97", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, "pred" },
--
2.7.4 (Apple Git-66)


## 0002-FFV1-p-frames.patch
From e3ba18599dd032563f57fdff72cf1ce4730cfc66 Mon Sep 17 00:00:00 2001
From: Stanislav Dolganov <dolganov@qst.hk>
Date: Thu, 18 Aug 2016 14:36:58 +0300
Subject: [PATCH 2/4] FFV1 p frames

---
 libavcodec/Makefile          |   4 +-
 libavcodec/ffv1.c            |  33 +++-
 libavcodec/ffv1.h            |  14 +-
 libavcodec/ffv1dec.c         | 354 +++++++++++++++++++++++++++++++++++++++-
 libavcodec/ffv1enc.c         | 372 ++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/x86/me_cmp_init.c |   4 +-
 6 files changed, 768 insertions(+), 13 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index dbbf9a1..260e5c5 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -278,8 +278,8 @@ OBJS-$(CONFIG_ESCAPE124_DECODER)       += escape124.o
 OBJS-$(CONFIG_ESCAPE130_DECODER)       += escape130.o
 OBJS-$(CONFIG_EVRC_DECODER)            += evrcdec.o acelp_vectors.o lsp.o
 OBJS-$(CONFIG_EXR_DECODER)             += exr.o
-OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1.o
-OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1.o
+OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1.o obmemc.o obmc.o
+OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1.o obmemc.o obme.o
 OBJS-$(CONFIG_FFWAVESYNTH_DECODER)     += ffwavesynth.o
 OBJS-$(CONFIG_FIC_DECODER)             += fic.o
 OBJS-$(CONFIG_FLAC_DECODER)            += flacdec.o flacdata.o flac.o
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index a14dd2a..f070a61 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -36,8 +36,11 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "rangecoder.h"
+#include "golomb.h"
 #include "mathops.h"
 #include "ffv1.h"
+#include "me_cmp.h"
+#include "h263.h"

 av_cold int ff_ffv1_common_init(AVCodecContext *avctx)
 {
@@ -49,19 +52,36 @@ av_cold int ff_ffv1_common_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->flags = avctx->flags;

+    int width, height, ret;
+
+    width = avctx->width;
+    height = avctx->height;
+
+    /* new end */
     s->picture.f = av_frame_alloc();
     s->last_picture.f = av_frame_alloc();
-    if (!s->picture.f || !s->last_picture.f)
-        return AVERROR(ENOMEM);
+    s->residual.f = av_frame_alloc();
+    if (!s->picture.f || !s->last_picture.f || !s->residual.f)
+        goto fail;

     s->width  = avctx->width;
     s->height = avctx->height;

+    s->c_image_line_buf = av_mallocz_array(sizeof(*s->c_image_line_buf), 2 * s->width);
+    s->p_image_line_buf = av_mallocz_array(sizeof(*s->p_image_line_buf), 2 * s->width);
+    if (!s->c_image_line_buf || !s->p_image_line_buf)
+        goto fail;
+
     // defaults
     s->num_h_slices = 1;
     s->num_v_slices = 1;

+    if ((ret = ff_obmc_common_init(&s->obmc, avctx)) < 0)
+        return ret;
+
     return 0;
+fail:
+    return AVERROR(ENOMEM);
 }

 av_cold int ff_ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
@@ -220,6 +240,10 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
         ff_thread_release_buffer(avctx, &s->last_picture);
     av_frame_free(&s->last_picture.f);

+    if (s->residual.f)
+        ff_thread_release_buffer(avctx, &s->residual);
+    av_frame_free(&s->residual.f);
+
     for (j = 0; j < s->max_slice_count; j++) {
         FFV1Context *fs = s->slice_context[j];
         for (i = 0; i < s->plane_count; i++) {
@@ -232,6 +256,9 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
         av_freep(&fs->sample_buffer32);
     }

+    av_freep(&s->p_image_line_buf);
+    av_freep(&s->c_image_line_buf);
+
     av_freep(&avctx->stats_out);
     for (j = 0; j < s->quant_table_count; j++) {
         av_freep(&s->initial_states[j]);
@@ -245,5 +272,7 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
     for (i = 0; i < s->max_slice_count; i++)
         av_freep(&s->slice_context[i]);

+    ff_obmc_close(&s->obmc);
+
     return 0;
 }
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index c2bae1e..0a901f0 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -42,6 +42,11 @@
 #include "rangecoder.h"
 #include "thread.h"

+#define FF_MPV_OFFSET(x) (offsetof(MpegEncContext, x) + offsetof(FFV1Context, obmc.m))
+#include "obmemc.h"
+
+#define MID_STATE 128
+
 #ifdef __INTEL_COMPILER
 #undef av_flatten
 #define av_flatten
@@ -49,6 +54,7 @@

 #define MAX_PLANES 4
 #define CONTEXT_SIZE 32
+#define FRAC_BITS 4

 #define MAX_QUANT_TABLES 8
 #define MAX_CONTEXT_INPUTS 5
@@ -93,7 +99,7 @@ typedef struct FFV1Context {
     int flags;
     int picture_number;
     int key_frame;
-    ThreadFrame picture, last_picture;
+    ThreadFrame picture, last_picture, residual;
     struct FFV1Context *fsrc;

     AVFrame *cur;
@@ -113,11 +119,14 @@ typedef struct FFV1Context {

     int use32bit;

+    uint16_t *p_image_line_buf, *c_image_line_buf;
+
     int ec;
     int intra;
     int slice_damaged;
     int key_frame_ok;
     int context_model;
+    int p_frame;

     int bits_per_raw_sample;
     int packed_at_lsb;
@@ -138,6 +147,9 @@ typedef struct FFV1Context {
     int slice_coding_mode;
     int slice_rct_by_coef;
     int slice_rct_ry_coef;
+
+    OBMCContext obmc;
+    uint8_t block_state[128 + 32*128];
 } FFV1Context;

 int ff_ffv1_common_init(AVCodecContext *avctx);
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index d8f35c3..7913baa 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -39,6 +39,75 @@
 #include "mathops.h"
 #include "ffv1.h"

+#include "obmc.h"
+
+static int ff_predict_frame(AVCodecContext *avctx, FFV1Context *f)
+{
+    int ret, i, x, y;
+    AVFrame *curr     = f->picture.f;
+    AVFrame *prev     = f->obmc.current_picture;
+    AVFrame *residual = f->residual.f;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(prev->format);
+    int width  = f->width;
+    int height = f->height;
+    int has_plane[4] = { 0 };
+    const int cw = AV_CEIL_RSHIFT(width, desc->log2_chroma_w);
+    const int ch = AV_CEIL_RSHIFT(height, desc->log2_chroma_h);
+
+    if (f->residual.f)
+        ff_thread_release_buffer(avctx, &f->residual);
+    if ((ret = ff_thread_ref_frame(&f->residual, &f->picture)) < 0)
+        return ret;
+    if ((ret = av_frame_make_writable(f->residual.f)) < 0) {
+        ff_thread_release_buffer(avctx, &f->residual);
+        return ret;
+    }
+
+    for (i = 0; i < desc->nb_components; i++)
+        has_plane[desc->comp[i].plane] = 1;
+
+    for (i = 0; i < desc->nb_components && has_plane[i]; i++)
+        memset(residual->buf[i]->data, 0, residual->buf[i]->size * sizeof(*residual->buf[i]->data));
+
+    for (i = 0; i < desc->nb_components; i++) {
+        const int w1 = (i == 1 || i == 2) ? cw : width;
+        const int h1 = (i == 1 || i == 2) ? ch : height;
+
+        const int depth = desc->comp[i].depth;
+        const int max_val = 1 << depth;
+
+        memset(f->p_image_line_buf, 0, 2 * width * sizeof(*f->p_image_line_buf));
+        memset(f->c_image_line_buf, 0, 2 * width * sizeof(*f->c_image_line_buf));
+
+        for (y = 0; y < h1; y++) {
+            memset(f->p_image_line_buf, 0, width * sizeof(*f->p_image_line_buf));
+            memset(f->c_image_line_buf, 0, width * sizeof(*f->c_image_line_buf));
+            av_read_image_line(f->c_image_line_buf,
+                               (void *)curr->data,
+                               curr->linesize,
+                               desc,
+                               0, y, i, w1, 0);
+            av_read_image_line(f->p_image_line_buf,
+                              (void *)prev->data,
+                              prev->linesize,
+                              desc,
+                              0, y, i, w1, 0);
+            for (x = 0; x < w1; ++x) {
+                f->c_image_line_buf[x] = (f->c_image_line_buf[x] + f->p_image_line_buf[x] - (max_val >> 2)) & (max_val - 1);
+            }
+            av_write_image_line(f->c_image_line_buf,
+                                residual->data,
+                                residual->linesize,
+                                desc,
+                                0, y, i, w1);
+        }
+    }
+
+    av_frame_copy(curr, residual);
+
+    return 0;
+}
+
 static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state,
                                                int is_signed)
 {
@@ -97,6 +166,83 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
     return ret;
 }

+static int decode_q_branch(FFV1Context *f, int level, int x, int y)
+{
+    RangeCoder *const c = &f->slice_context[0]->c;
+    OBMCContext *s = &f->obmc;
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+    int res;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
+        return 0;
+    }
+
+    if(level==s->block_max_depth || get_rac(c, &f->block_state[4 + s_context])){
+        int type, mx, my;
+        int l = left->color[0];
+        int cb= left->color[1];
+        int cr= left->color[2];
+        unsigned ref = 0;
+        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
+        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
+
+        type= get_rac(c, &f->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
+
+        if(type){
+            pred_mv(s, &mx, &my, 0, left, top, tr);
+            l += get_symbol(c, &f->block_state[32], 1);
+            if (f->obmc.nb_planes > 2) {
+                cb += get_symbol(c, &f->block_state[64], 1);
+                cr += get_symbol(c, &f->block_state[96], 1);
+            }
+        }else{
+            if(s->ref_frames > 1)
+                ref = get_symbol(c, &f->block_state[128 + 1024 + 32*ref_context], 0);
+            if (ref >= s->ref_frames) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid ref\n");
+                return AVERROR_INVALIDDATA;
+            }
+            pred_mv(s, &mx, &my, ref, left, top, tr);
+            mx += get_symbol(c, &f->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+            my += get_symbol(c, &f->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+        }
+        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
+    }else{
+        if ((res = decode_q_branch(f, level+1, 2*x+0, 2*y+0)) < 0 ||
+            (res = decode_q_branch(f, level+1, 2*x+1, 2*y+0)) < 0 ||
+            (res = decode_q_branch(f, level+1, 2*x+0, 2*y+1)) < 0 ||
+            (res = decode_q_branch(f, level+1, 2*x+1, 2*y+1)) < 0)
+            return res;
+    }
+    return 0;
+}
+
+static int decode_blocks(FFV1Context *s){
+    int x, y;
+    int w= s->obmc.b_width;
+    int h= s->obmc.b_height;
+    int res;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            if ((res = decode_q_branch(s, 0, x, y)) < 0)
+                return res;
+        }
+    }
+
+    return 0;
+}
+
 #define TYPE int16_t
 #define RENAME(name) name
 #include "ffv1dec_template.c"
@@ -419,6 +565,13 @@ static int read_extra_header(FFV1Context *f)
         if (f->micro_version < 0)
             return AVERROR_INVALIDDATA;
     }
+
+    if (f->version == 3 && f->micro_version > 4 || f->version == 4 && f->micro_version > 2) {
+        f->p_frame = 1;
+        f->micro_version--;
+    } else {
+        f->p_frame = 0;
+    }
     f->ac = get_symbol(c, state, 0);

     if (f->ac == AC_RANGE_CUSTOM_TAB) {
@@ -514,7 +667,7 @@ static int read_extra_header(FFV1Context *f)
 static int read_header(FFV1Context *f)
 {
     uint8_t state[CONTEXT_SIZE];
-    int i, j, context_count = -1; //-1 to avoid warning
+    int i, j, ret, context_count = -1; //-1 to avoid warning
     RangeCoder *const c = &f->slice_context[0]->c;

     memset(state, 128, sizeof(state));
@@ -669,6 +822,9 @@ static int read_header(FFV1Context *f)
         return AVERROR(ENOSYS);
     }

+    if ((ret = ff_obmc_decode_init(&f->obmc)) < 0)
+        return ret;
+
     ff_dlog(f->avctx, "%d %d %d\n",
             f->chroma_h_shift, f->chroma_v_shift, f->avctx->pix_fmt);
     if (f->version < 2) {
@@ -750,6 +906,49 @@ static int read_header(FFV1Context *f)
             }
         }
     }
+
+    return 0;
+}
+
+static int decode_p_header(FFV1Context *f)
+{
+    uint8_t state[CONTEXT_SIZE];
+    int plane_index;
+    RangeCoder *const c = &f->slice_context[0]->c;
+
+    memset(state, 128, sizeof(state));
+
+    if (f->key_frame) {
+        memset(f->block_state, MID_STATE, sizeof(f->block_state));
+        f->obmc.max_ref_frames = get_symbol(c, state, 0) + 1;
+    }
+    if (!f->key_frame) {
+        for(plane_index=0; plane_index<FFMIN(f->obmc.nb_planes, 2); plane_index++){
+            int htaps, i, sum=0;
+            PlaneObmc *p= &f->obmc.plane[plane_index];
+            p->diag_mc = get_rac(c, state);
+            htaps = get_symbol(c, state, 0)*2 + 2;
+            if((unsigned)htaps > HTAPS_MAX || htaps==0)
+                return AVERROR_INVALIDDATA;
+            p->htaps= htaps;
+            for(i= p->htaps/2; i; i--) {
+                p->hcoeff[i]= get_symbol(c, state, 0) * (1-2*(i&1));
+                sum += p->hcoeff[i];
+            }
+            p->hcoeff[0]= 32-sum;
+        }
+        f->obmc.plane[2].diag_mc= f->obmc.plane[1].diag_mc;
+        f->obmc.plane[2].htaps  = f->obmc.plane[1].htaps;
+        memcpy(f->obmc.plane[2].hcoeff, f->obmc.plane[1].hcoeff, sizeof(f->obmc.plane[1].hcoeff));
+    }
+
+    f->obmc.mv_scale       = get_symbol(c, state, 0);
+    f->obmc.block_max_depth= get_symbol(c, state, 0);
+    if(f->obmc.block_max_depth > 1 || f->obmc.block_max_depth < 0){
+        av_log(f->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large\n", f->obmc.block_max_depth);
+        f->obmc.block_max_depth= 0;
+        return AVERROR_INVALIDDATA;
+    }
     return 0;
 }

@@ -778,7 +977,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     int buf_size        = avpkt->size;
     FFV1Context *f      = avctx->priv_data;
     RangeCoder *const c = &f->slice_context[0]->c;
-    int i, ret;
+    int i, ret, plane_index;
     uint8_t keystate = 128;
     uint8_t *buf_p;
     AVFrame *p;
@@ -801,8 +1000,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);

     p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
+    f->obmc.current_picture->pict_type = AV_PICTURE_TYPE_I;
     if (get_rac(c, &keystate)) {
         p->key_frame    = 1;
+        f->obmc.keyframe = f->key_frame = 1;
         f->key_frame_ok = 0;
         if ((ret = read_header(f)) < 0)
             return ret;
@@ -814,6 +1015,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
             return AVERROR_INVALIDDATA;
         }
         p->key_frame = 0;
+        f->obmc.keyframe = f->key_frame = 0;
+    }
+
+    if (f->p_frame) {
+        if ((ret = decode_p_header(f)) < 0)
+            return ret;
+
+        p->pict_type = p->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+        if ((ret=ff_obmc_common_init_after_header(&f->obmc)) < 0)
+            return ret;
     }

     if ((ret = ff_thread_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF)) < 0)
@@ -823,6 +1035,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
                f->version, p->key_frame, f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);

+    if (f->p_frame) {
+        if ((ret = ff_obmc_predecode_frame(&f->obmc)) < 0)
+            return ret;
+
+        if ((ret = decode_blocks(f)) < 0)
+            return ret;
+    }
+
     ff_thread_finish_setup(avctx);

     buf_p = buf + buf_size;
@@ -899,6 +1119,33 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
                           fs->slice_height);
         }
     }
+
+    if (f->p_frame) {
+        ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
+
+        av_frame_copy(f->obmc.last_pictures[1], f->last_picture.f);
+
+        for (plane_index=0; plane_index < f->obmc.nb_planes; plane_index++) {
+            PlaneObmc *pc = &f->obmc.plane[plane_index];
+            int w = pc->width;
+            int h = pc->height;
+
+            if(!p->key_frame){
+                memset(f->obmc.spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(&f->obmc, f->obmc.spatial_idwt_buffer, plane_index, 1);
+            }
+        }
+
+        if (!p->key_frame) {
+            if ((ret = ff_predict_frame(avctx, f)) < 0) {
+                ff_thread_report_progress(&f->picture, INT_MAX, 0);
+                return ret;
+            }
+        }
+        av_frame_copy(f->obmc.current_picture, f->picture.f);
+    }
+    ff_obmc_release_buffer(&f->obmc);
+
     ff_thread_report_progress(&f->picture, INT_MAX, 0);

     f->picture_number++;
@@ -922,10 +1169,18 @@ static int init_thread_copy(AVCodecContext *avctx)

     f->picture.f      = NULL;
     f->last_picture.f = NULL;
+    f->residual.f     = NULL;
     f->sample_buffer  = NULL;
     f->max_slice_count = 0;
     f->slice_count = 0;

+    f->obmc.current_picture = NULL;
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+        f->obmc.last_pictures[i] = NULL;
+
+    f->p_image_line_buf = NULL;
+    f->c_image_line_buf = NULL;
+
     for (i = 0; i < f->quant_table_count; i++) {
         av_assert0(f->version > 1);
         f->initial_states[i] = av_memdup(f->initial_states[i],
@@ -934,11 +1189,47 @@ static int init_thread_copy(AVCodecContext *avctx)

     f->picture.f      = av_frame_alloc();
     f->last_picture.f = av_frame_alloc();
+    f->residual.f     = av_frame_alloc();
+
+    if (!f->picture.f || !f->last_picture.f || !f->residual.f)
+        goto fail;
+
+    f->obmc.current_picture = av_frame_alloc();
+    f->obmc.mconly_picture = av_frame_alloc();
+
+    f->width  = avctx->width;
+    f->height = avctx->height;
+
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, f->obmc.spatial_idwt_buffer, f->width, f->height * sizeof(IDWTELEM), fail);
+
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+        f->obmc.last_pictures[i] = av_frame_alloc();
+
+    int w= AV_CEIL_RSHIFT(avctx->width,  LOG2_MB_SIZE);
+    int h= AV_CEIL_RSHIFT(avctx->height, LOG2_MB_SIZE);
+
+    f->obmc.b_width = w;
+    f->obmc.b_height= h;
+
+    f->obmc.block = av_mallocz_array(w * h,  sizeof(BlockNode) << 2); // FIXME Maybe large
+
+    f->obmc.avctx = avctx;
+
+    f->obmc.chroma_h_shift = f->chroma_h_shift;
+    f->obmc.chroma_v_shift = f->chroma_v_shift;
+
+    f->p_image_line_buf = av_mallocz_array(sizeof(*f->p_image_line_buf), 2 * f->width);
+    f->c_image_line_buf = av_mallocz_array(sizeof(*f->c_image_line_buf), 2 * f->width);
+
+    if (!f->p_image_line_buf || !f->c_image_line_buf)
+        goto fail;

     if ((ret = ff_ffv1_init_slice_contexts(f)) < 0)
         return ret;

     return 0;
+fail:
+    return AVERROR(ENOMEM);
 }
 #endif

@@ -955,6 +1246,7 @@ static void copy_fields(FFV1Context *fsdst, FFV1Context *fssrc, FFV1Context *fsr
     fsdst->colorspace          = fsrc->colorspace;

     fsdst->ec                  = fsrc->ec;
+    fsdst->p_frame             = fsrc->p_frame;
     fsdst->intra               = fsrc->intra;
     fsdst->slice_damaged       = fssrc->slice_damaged;
     fsdst->key_frame_ok        = fsrc->key_frame_ok;
@@ -975,23 +1267,48 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     FFV1Context *fsrc = src->priv_data;
     FFV1Context *fdst = dst->priv_data;
-    int i, ret;
+    int i, j, ret;

     if (dst == src)
         return 0;

     {
-        ThreadFrame picture = fdst->picture, last_picture = fdst->last_picture;
+        ThreadFrame picture = fdst->picture, last_picture = fdst->last_picture, residual = fdst->residual;
+        uint16_t *c_image_line_buf = fdst->c_image_line_buf, *p_image_line_buf = fdst->p_image_line_buf;
         uint8_t (*initial_states[MAX_QUANT_TABLES])[32];
         struct FFV1Context *slice_context[MAX_SLICES];
         memcpy(initial_states, fdst->initial_states, sizeof(fdst->initial_states));
         memcpy(slice_context,  fdst->slice_context , sizeof(fdst->slice_context));
+        AVFrame *current_picture = fdst->obmc.current_picture, *mconly_picture = fdst->obmc.mconly_picture;
+        AVFrame *last_pictures[MAX_REF_FRAMES];
+        BlockNode *block = fdst->obmc.block;
+        uint8_t *scratchbuf = fdst->obmc.scratchbuf;
+        uint8_t *emu_edge_buffer = fdst->obmc.emu_edge_buffer;
+        IDWTELEM *spatial_idwt_buffer = fdst->obmc.spatial_idwt_buffer;
+        for (i = 0; i < MAX_REF_FRAMES; i++)
+            last_pictures[i] = fdst->obmc.last_pictures[i];

         memcpy(fdst, fsrc, sizeof(*fdst));
         memcpy(fdst->initial_states, initial_states, sizeof(fdst->initial_states));
         memcpy(fdst->slice_context,  slice_context , sizeof(fdst->slice_context));
         fdst->picture      = picture;
         fdst->last_picture = last_picture;
+        fdst->residual     = residual;
+
+        fdst->p_image_line_buf = p_image_line_buf;
+        fdst->c_image_line_buf = c_image_line_buf;
+
+        fdst->obmc.current_picture   = current_picture;
+        fdst->obmc.mconly_picture    = mconly_picture;
+        for (i = 0; i < MAX_REF_FRAMES; i++)
+            fdst->obmc.last_pictures[i] = last_pictures[i];
+        fdst->obmc.block = block;
+        fdst->obmc.scratchbuf = scratchbuf;
+        fdst->obmc.emu_edge_buffer = emu_edge_buffer;
+        fdst->obmc.spatial_idwt_buffer = spatial_idwt_buffer;
+
+        fdst->obmc.avctx = dst;
+
         for (i = 0; i<fdst->num_h_slices * fdst->num_v_slices; i++) {
             FFV1Context *fssrc = fsrc->slice_context[i];
             FFV1Context *fsdst = fdst->slice_context[i];
@@ -1010,6 +1327,35 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
             return ret;
     }

+    for (i = 0; i < MAX_REF_FRAMES; i++)
+        av_frame_ref(fdst->obmc.last_pictures[i], fsrc->obmc.last_pictures[i]);
+
+    av_frame_ref(fdst->obmc.current_picture, fsrc->obmc.current_picture);
+
+    for (i = 0; i < MAX_REF_FRAMES; i++) {
+        for (j=0; j<9; j++) {
+            int is_chroma = !!(j%3);
+            int h = is_chroma ? AV_CEIL_RSHIFT(fsrc->avctx->height, fsrc->chroma_v_shift) : fsrc->avctx->height;
+            int ls = fdst->obmc.last_pictures[i]->linesize[j%3];
+            if (fsrc->obmc.halfpel_plane[i][1+j/3][j%3]) {
+                fdst->obmc.halfpel_plane[i][1+j/3][j%3] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
+                memcpy(
+                    fdst->obmc.halfpel_plane[i][1+j/3][j%3],
+                    fsrc->obmc.halfpel_plane[i][1+j/3][j%3] - EDGE_WIDTH*(1+fsrc->obmc.last_pictures[i]->linesize[j%3]),
+                    ls * (h + 2 * EDGE_WIDTH) * sizeof(*fdst->obmc.halfpel_plane[i][1+j/3][j%3])
+                );
+                fdst->obmc.halfpel_plane[i][1+j/3][j%3] += EDGE_WIDTH * (1 + ls);
+            }
+            fdst->obmc.halfpel_plane[i][0][j%3] = fdst->obmc.last_pictures[i]->data[j%3];
+        }
+    }
+
+    memcpy(
+        fdst->obmc.block,
+        fsrc->obmc.block,
+        (fsrc->obmc.b_width * fsrc->obmc.b_height * (sizeof(BlockNode) << (fsrc->obmc.block_max_depth*2)))
+    );
+
     fdst->fsrc = fsrc;

     return 0;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index dae68ae..4596585 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -41,6 +41,8 @@
 #include "mathops.h"
 #include "ffv1.h"

+#include "obme.h"
+
 static const int8_t quant5_10bit[256] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
@@ -136,6 +138,75 @@ static const uint8_t ver2_state[256] = {
     241, 243, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 252, 253, 254, 255,
 };

+static int ff_frame_diff(FFV1Context *f, const AVFrame *pict)
+{
+    int ret, i, x, y;
+    AVFrame *prev     = f->obmc.current_picture;
+    AVFrame *residual = f->residual.f;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(prev->format);
+    int width  = f->width;
+    int height = f->height;
+    int has_plane[4] = { 0 };
+    const int cw = AV_CEIL_RSHIFT(width, desc->log2_chroma_w);
+    const int ch = AV_CEIL_RSHIFT(height, desc->log2_chroma_h);
+
+    if (f->picture.f)
+        av_frame_unref(f->picture.f);
+    if (f->residual.f)
+        av_frame_unref(f->residual.f);
+    if ((ret = av_frame_ref(f->residual.f, pict)) < 0)
+        return ret;
+    if ((ret = av_frame_make_writable(f->residual.f)) < 0) {
+        av_frame_unref(f->residual.f);
+        return ret;
+    }
+
+    for (i = 0; i < desc->nb_components; i++)
+        has_plane[desc->comp[i].plane] = 1;
+
+    for (i = 0; i < desc->nb_components && has_plane[i]; i++)
+        memset(residual->buf[i]->data, 0, residual->buf[i]->size * sizeof(*residual->buf[i]->data));
+
+    for (i = 0; i < desc->nb_components; i++) {
+        const int w1 = (i == 1 || i == 2) ? cw : width;
+        const int h1 = (i == 1 || i == 2) ? ch : height;
+
+        const int depth = desc->comp[i].depth;
+        const int max_val = 1 << depth;
+
+        memset(f->p_image_line_buf, 0, 2 * width * sizeof(*f->p_image_line_buf));
+        memset(f->c_image_line_buf, 0, 2 * width * sizeof(*f->c_image_line_buf));
+
+        for (y = 0; y < h1; y++) {
+            memset(f->p_image_line_buf, 0, width * sizeof(*f->p_image_line_buf));
+            memset(f->c_image_line_buf, 0, width * sizeof(*f->c_image_line_buf));
+            av_read_image_line(f->c_image_line_buf,
+                               (void *)pict->data,
+                               pict->linesize,
+                               desc,
+                               0, y, i, w1, 0);
+            av_read_image_line(f->p_image_line_buf,
+                              (void *)prev->data,
+                              prev->linesize,
+                              desc,
+                              0, y, i, w1, 0);
+            for (x = 0; x < w1; ++x) {
+                f->c_image_line_buf[x] = (f->c_image_line_buf[x] - f->p_image_line_buf[x] + (max_val >> 2)) & (max_val - 1);
+            }
+            av_write_image_line(f->c_image_line_buf,
+                                residual->data,
+                                residual->linesize,
+                                desc,
+                                0, y, i, w1);
+        }
+    }
+
+    if ((ret = av_frame_ref(f->picture.f, f->residual.f)) < 0)
+        return ret;
+
+    return 0;
+}
+
 static void find_best_state(uint8_t best_state[256][256],
                             const uint8_t one_state[256])
 {
@@ -268,6 +339,162 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState *const state,
     update_vlc_state(state, v);
 }

+typedef struct RangeEncoderContext {
+    RangeCoder c;
+    uint8_t buffer[1024];
+    uint8_t state[128 + 32*128];
+    uint8_t *pbbak;
+    uint8_t *pbbak_start;
+    int base_bits;
+} RangeEncoderContext;
+
+static void put_encoder_rac(ObmcCoderContext *c, int ctx, int v)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *rc = &f->slice_context[0]->c;
+    uint8_t *state = f->block_state;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+        state = coder->state;
+    }
+    put_rac(rc, &state[ctx], v);
+}
+
+static void put_encoder_symbol(ObmcCoderContext *c, int ctx, int v, int sign)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *rc = &f->slice_context[0]->c;
+    uint8_t *state = f->block_state;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+        state = coder->state;
+    }
+    put_symbol(rc, &state[ctx], v, sign);
+}
+
+static void ff_ffv1_init_encode_callbacks(ObmcCoderContext *, AVCodecContext *);
+
+static void init_frame_encoder(AVCodecContext *avctx, ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)avctx->priv_data;
+    RangeCoder *const rc = &f->slice_context[0]->c;
+    RangeEncoderContext *coder = av_mallocz(sizeof(RangeEncoderContext));
+    c->priv_data = coder;
+
+    coder->pbbak = rc->bytestream;
+    coder->pbbak_start = rc->bytestream_start;
+    coder->base_bits = get_rac_count(rc) - 8*(rc->bytestream - rc->bytestream_start);
+    coder->c = *rc;
+    coder->c.bytestream_start = coder->c.bytestream = coder->buffer; //FIXME end/start? and at the other stoo
+    memcpy(coder->state, f->block_state, sizeof(f->block_state));
+
+    ff_ffv1_init_encode_callbacks(c, avctx);
+}
+
+static void free_coder(ObmcCoderContext *c)
+{
+    av_freep(&c->priv_data);
+}
+
+static void copy_coder(ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *const rc = &f->slice_context[0]->c;
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+
+    int len = coder->c.bytestream - coder->c.bytestream_start;
+
+    memcpy(coder->pbbak, coder->buffer, len);
+    *rc = coder->c;
+    rc->bytestream_start= coder->pbbak_start;
+    rc->bytestream= coder->pbbak + len;
+    memcpy(f->block_state, coder->state, sizeof(f->block_state));
+}
+
+static void reset_coder(ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *const rc = &f->slice_context[0]->c;
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+
+    *rc = coder->c;
+    rc->bytestream_start= coder->pbbak_start;
+    rc->bytestream= coder->pbbak;
+    memcpy(f->block_state, coder->state, sizeof(f->block_state));
+}
+
+static void put_level_break(ObmcCoderContext *c, int ctx, int v)
+{
+    put_encoder_rac(c, ctx, v);
+}
+
+static void put_block_type  (struct ObmcCoderContext *c, int ctx, int type)
+{
+    put_encoder_rac(c, ctx, type);
+}
+
+static void put_best_ref    (struct ObmcCoderContext *c, int ctx, int best_ref)
+{
+    put_encoder_symbol(c, ctx, best_ref, 0);
+}
+
+static void put_block_mv    (struct ObmcCoderContext *c, int ctx_mx, int ctx_my, int mx, int my)
+{
+    put_encoder_symbol(c, ctx_mx, mx, 1);
+    put_encoder_symbol(c, ctx_my, my, 1);
+}
+
+static void put_block_color (struct ObmcCoderContext *c, int ctx_l, int ctx_cb, int ctx_cr, int l, int cb, int cr)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    put_encoder_symbol(c, ctx_l, l, 1);
+    if (f->obmc.nb_planes > 2) {
+        put_encoder_symbol(c, ctx_cb, cb, 1);
+        put_encoder_symbol(c, ctx_cr, cr, 1);
+    }
+}
+
+static int get_coder_bits(ObmcCoderContext *c)
+{
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+    return get_rac_count(&coder->c) - coder->base_bits;
+}
+
+static int get_coder_available_bytes(ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *rc = &f->slice_context[0]->c;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+    }
+    return rc->bytestream_end - rc->bytestream;
+}
+
+static void ff_ffv1_init_encode_callbacks(ObmcCoderContext *c, AVCodecContext *avctx)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    av_assert0(sizeof(f->block_state) >= 256);
+
+    c->avctx            = avctx;
+    c->put_level_break  = put_level_break;
+    c->put_block_type   = put_block_type;
+    c->put_block_color  = put_block_color;
+    c->put_best_ref     = put_best_ref;
+    c->put_block_mv     = put_block_mv;
+
+    c->init_frame_coder = init_frame_encoder;
+    c->reset_coder      = reset_coder;
+    c->copy_coder       = copy_coder;
+    c->free             = free_coder;
+
+    c->get_bits         = get_coder_bits;
+    c->available_bytes  = get_coder_available_bytes;
+
+}
+
 #define TYPE int16_t
 #define RENAME(name) name
 #include "ffv1enc_template.c"
@@ -388,6 +615,32 @@ static void write_header(FFV1Context *f)
     }
 }

+static void write_p_header(FFV1Context *f)
+{
+    uint8_t state[CONTEXT_SIZE];
+    int i, plane_index;
+    RangeCoder *const c = &f->slice_context[0]->c;
+
+    memset(state, 128, sizeof(state));
+
+    if (f->key_frame) {
+        memset(f->block_state, MID_STATE, sizeof(f->block_state));
+        put_symbol(c, state, f->obmc.max_ref_frames-1, 0);
+    }
+    if (!f->key_frame) { //FIXME update_mc
+        for (plane_index=0; plane_index<FFMIN(f->obmc.nb_planes, 2); plane_index++) {
+            PlaneObmc *p= &f->obmc.plane[plane_index];
+            put_rac(c, state, p->diag_mc);
+            put_symbol(c, state, p->htaps/2-1, 0);
+            for (i = p->htaps/2; i; i--)
+                put_symbol(c, state, FFABS(p->hcoeff[i]), 0);
+        }
+    }
+
+    put_symbol(c, state, f->obmc.mv_scale, 0);
+    put_symbol(c, state, f->obmc.block_max_depth, 0);
+}
+
 static int write_extradata(FFV1Context *f)
 {
     RangeCoder *const c = &f->c;
@@ -410,9 +663,9 @@ static int write_extradata(FFV1Context *f)
     put_symbol(c, state, f->version, 0);
     if (f->version > 2) {
         if (f->version == 3) {
-            f->micro_version = 4;
+            f->micro_version = 4 + f->p_frame;
         } else if (f->version == 4)
-            f->micro_version = 2;
+            f->micro_version = 2 + f->p_frame;
         put_symbol(c, state, f->micro_version, 0);
     }

@@ -599,6 +852,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             return AVERROR_INVALIDDATA;
         }
         s->version = FFMAX(s->version, 1);
+        s->p_frame = 0;
     case AV_PIX_FMT_GRAY8:
     case AV_PIX_FMT_YA8:
     case AV_PIX_FMT_YUV444P:
@@ -619,6 +873,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->bits_per_raw_sample = 8;
         break;
     case AV_PIX_FMT_RGB32:
+        s->p_frame = 0;
         s->colorspace = 1;
         s->transparency = 1;
         s->chroma_planes = 1;
@@ -635,6 +890,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
         break;
     case AV_PIX_FMT_0RGB32:
+        s->p_frame = 0;
         s->colorspace = 1;
         s->chroma_planes = 1;
         s->bits_per_raw_sample = 8;
@@ -666,12 +922,14 @@ FF_ENABLE_DEPRECATION_WARNINGS
             }
         }
         s->version = FFMAX(s->version, 1);
+        s->p_frame = 0;
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
         return AVERROR(ENOSYS);
     }
     av_assert0(s->bits_per_raw_sample >= 8);
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);

     if (s->bits_per_raw_sample > 8) {
         if (s->ac == AC_GOLOMB_RICE) {
@@ -704,6 +962,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->state_transition[i] = c.one_state[i];
     }

+    if (avctx->width % 16 || avctx->height % 16) {
+        s->p_frame = 0;
+    }
+
     for (i = 0; i < 256; i++) {
         s->quant_table_count = 2;
         if (s->bits_per_raw_sample <= 8) {
@@ -886,6 +1148,9 @@ slices_ok:
             }
     }

+    ff_obmc_encode_init(&s->obmc, avctx);
+    ff_ffv1_init_encode_callbacks(&s->obmc.obmc_coder, avctx);
+
     return 0;
 }

@@ -1086,12 +1351,20 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pict, int *got_packet)
 {
     FFV1Context *f      = avctx->priv_data;
+    if (f->p_frame) {
+        if (f->last_picture.f)
+            av_frame_unref(f->last_picture.f);
+        FFSWAP(ThreadFrame, f->picture, f->last_picture);
+    }
     RangeCoder *const c = &f->slice_context[0]->c;
     AVFrame *const p    = f->picture.f;
     int used_count      = 0;
     uint8_t keystate    = 128;
     uint8_t *buf_p;
-    int i, ret;
+    AVFrame *pic = NULL;
+    const int width  = f->avctx->width;
+    const int height = f->avctx->height;
+    int plane_index, i, ret;
     int64_t maxsize =   AV_INPUT_BUFFER_MIN_SIZE
                       + avctx->width*avctx->height*37LL*4;

@@ -1144,12 +1417,35 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (f->version > 3)
         maxsize = AV_INPUT_BUFFER_MIN_SIZE + avctx->width*avctx->height*3LL*4;

+    if (f->p_frame) {
+        maxsize += f->obmc.b_width*f->obmc.b_height*MB_SIZE*MB_SIZE*3;
+    }
+
     if ((ret = ff_alloc_packet2(avctx, pkt, maxsize, 0)) < 0)
         return ret;

     ff_init_range_encoder(c, pkt->data, pkt->size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);

+    if (f->p_frame) {
+        av_frame_copy(f->obmc.input_picture, pict);
+        for (i=0; i < f->obmc.nb_planes; i++)
+        {
+            int hshift = i ? f->chroma_h_shift : 0;
+            int vshift = i ? f->chroma_v_shift : 0;
+            f->obmc.mpvencdsp.draw_edges(f->obmc.input_picture->data[i], f->obmc.input_picture->linesize[i],
+                                    AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift),
+                                    EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
+                                    EDGE_TOP | EDGE_BOTTOM);
+        }
+        emms_c();
+        pic = f->obmc.input_picture;
+        pic->pict_type = pict->pict_type;
+        pic->quality = pict->quality;
+
+        f->obmc.m.picture_number= avctx->frame_number;
+    }
+
     av_frame_unref(p);
     if ((ret = av_frame_ref(p, pict)) < 0)
         return ret;
@@ -1162,11 +1458,64 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
         put_rac(c, &keystate, 1);
         f->key_frame = 1;
+        f->obmc.keyframe = 1;
         f->gob_count++;
         write_header(f);
     } else {
         put_rac(c, &keystate, 0);
         f->key_frame = 0;
+        f->obmc.keyframe = 0;
+    }
+
+    if (f->p_frame) {
+        write_p_header(f);
+
+        f->obmc.m.pict_type = pic->pict_type = f->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+        ff_obmc_pre_encode_frame(&f->obmc, avctx, pict);
+
+        ff_obmc_common_init_after_header(&f->obmc);
+
+        f->obmc.m.misc_bits = 8*(c->bytestream - c->bytestream_start);
+        ff_obmc_encode_blocks(&f->obmc, 1);
+        f->obmc.m.mv_bits = 8*(c->bytestream - c->bytestream_start) - f->obmc.m.misc_bits;
+
+        for(plane_index=0; plane_index < f->obmc.nb_planes; plane_index++){
+            PlaneObmc *p= &f->obmc.plane[plane_index];
+            int w= p->width;
+            int h= p->height;
+
+            if(pic->pict_type == AV_PICTURE_TYPE_I) {
+                av_frame_copy(f->obmc.current_picture, pict);
+                break;
+            } else {
+                memset(f->obmc.spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(&f->obmc, f->obmc.spatial_idwt_buffer, plane_index, 1);
+            }
+        }
+
+        if (!f->key_frame) {
+            if ((ret = ff_frame_diff(f, pict)) < 0) {
+                return ret;
+            }
+            av_frame_copy(f->obmc.current_picture, pict);
+        }
+
+        ff_obmc_release_buffer(&f->obmc);
+
+        f->obmc.current_picture->coded_picture_number = avctx->frame_number;
+        f->obmc.current_picture->pict_type = pic->pict_type;
+        f->obmc.current_picture->quality = pic->quality;
+        f->obmc.m.frame_bits = 8*(c->bytestream - c->bytestream_start);
+        f->obmc.m.p_tex_bits = f->obmc.m.frame_bits - f->obmc.m.misc_bits - f->obmc.m.mv_bits;
+        f->obmc.m.current_picture.f->display_picture_number =
+        f->obmc.m.current_picture.f->coded_picture_number   = avctx->frame_number;
+        f->obmc.m.current_picture.f->quality                = pic->quality;
+        f->obmc.m.total_bits += 8*(c->bytestream - c->bytestream_start);
+
+        f->obmc.m.last_pict_type = f->obmc.m.pict_type;
+
+        emms_c();
     }

     if (f->ac == AC_RANGE_CUSTOM_TAB) {
@@ -1232,19 +1581,34 @@ FF_ENABLE_DEPRECATION_WARNINGS
     pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
     *got_packet = 1;

+    if (f->p_frame) {
+        if (f->picture.f)
+            av_frame_unref(f->picture.f);
+        if ((ret = av_frame_ref(f->picture.f, pict)) < 0)
+            return ret;
+        if (f->last_picture.f)
+            av_frame_unref(f->last_picture.f);
+    }
+
     return 0;
 }

 static av_cold int encode_close(AVCodecContext *avctx)
 {
+    FFV1Context *f = avctx->priv_data;
+
     ff_ffv1_close(avctx);
+    av_frame_free(&f->obmc.input_picture);
     return 0;
 }

 #define OFFSET(x) offsetof(FFV1Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
+    FF_MPV_COMMON_OPTS
+    { "iter",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ITER }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" },
     { "slicecrc", "Protect slices with CRCs", OFFSET(ec), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
+    { "pframe", "Use P frames", OFFSET(p_frame), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "coder", "Coder type", OFFSET(ac), AV_OPT_TYPE_INT,
             { .i64 = 0 }, -2, 2, VE, "coder" },
         { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST,
@@ -1271,6 +1635,8 @@ static const AVClass ffv1_class = {
 #if FF_API_CODER_TYPE
 static const AVCodecDefault ffv1_defaults[] = {
     { "coder", "-1" },
+    { "me_method", "iter" },
+    { "flags", "+qpel+mv4" },
     { NULL },
 };
 #endif
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index dc3e6f8..5f1a223 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -627,7 +627,9 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
         c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
         c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
 #endif
-        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
+        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) &&
+            avctx->codec_id != AV_CODEC_ID_SNOW &&
+            avctx->codec_id != AV_CODEC_ID_FFV1) {
             c->sad[0]        = ff_sad16_sse2;
             c->pix_abs[0][0] = ff_sad16_sse2;
             c->pix_abs[0][1] = ff_sad16_x2_sse2;
--
2.7.4 (Apple Git-66)


## 0003-median-compare-function-for-ffv1-me.patch
From 1cb7311d8c6b3d120e79c07c4eaf8006f244026d Mon Sep 17 00:00:00 2001
From: Stanislav Dolganov <dolganov@qst.hk>
Date: Thu, 18 Aug 2016 14:40:09 +0300
Subject: [PATCH 3/4] median compare function for ffv1 me

---
 libavcodec/avcodec.h       | 33 ++++++++++----------
 libavcodec/ffv1enc.c       |  2 ++
 libavcodec/me_cmp.c        | 76 ++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/me_cmp.h        |  1 +
 libavcodec/motion_est.c    |  1 +
 libavcodec/mpegvideo.h     |  3 +-
 libavcodec/obme.c          |  1 +
 libavcodec/options_table.h |  1 +
 8 files changed, 101 insertions(+), 17 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 3b21537..8293b8e 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2093,22 +2093,23 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     int ildct_cmp;
-#define FF_CMP_SAD    0
-#define FF_CMP_SSE    1
-#define FF_CMP_SATD   2
-#define FF_CMP_DCT    3
-#define FF_CMP_PSNR   4
-#define FF_CMP_BIT    5
-#define FF_CMP_RD     6
-#define FF_CMP_ZERO   7
-#define FF_CMP_VSAD   8
-#define FF_CMP_VSSE   9
-#define FF_CMP_NSSE   10
-#define FF_CMP_W53    11
-#define FF_CMP_W97    12
-#define FF_CMP_DCTMAX 13
-#define FF_CMP_DCT264 14
-#define FF_CMP_CHROMA 256
+#define FF_CMP_SAD          0
+#define FF_CMP_SSE          1
+#define FF_CMP_SATD         2
+#define FF_CMP_DCT          3
+#define FF_CMP_PSNR         4
+#define FF_CMP_BIT          5
+#define FF_CMP_RD           6
+#define FF_CMP_ZERO         7
+#define FF_CMP_VSAD         8
+#define FF_CMP_VSSE         9
+#define FF_CMP_NSSE         10
+#define FF_CMP_W53          11
+#define FF_CMP_W97          12
+#define FF_CMP_DCTMAX       13
+#define FF_CMP_DCT264       14
+#define FF_CMP_MEDIAN_SAD   15
+#define FF_CMP_CHROMA       256

     /**
      * ME diamond size & shape
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 4596585..f946602 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -1637,6 +1637,8 @@ static const AVCodecDefault ffv1_defaults[] = {
     { "coder", "-1" },
     { "me_method", "iter" },
     { "flags", "+qpel+mv4" },
+    { "cmp", "msad" },
+    { "subcmp", "msad" },
     { NULL },
 };
 #endif
diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
index dc76b07..6639b91 100644
--- a/libavcodec/me_cmp.c
+++ b/libavcodec/me_cmp.c
@@ -139,6 +139,45 @@ static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     return s;
 }

+static inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                             ptrdiff_t stride, int h)
+{
+    int s = 0, i, j;
+
+#define V(x) (pix1[x] - pix2[x])
+
+    s    += abs(V(0));
+    s    += abs(V(1) - V(0));
+    s    += abs(V(2) - V(1));
+    s    += abs(V(3) - V(2));
+    s    += abs(V(4) - V(3));
+    s    += abs(V(5) - V(4));
+    s    += abs(V(6) - V(5));
+    s    += abs(V(7) - V(6));
+    s    += abs(V(8) - V(7));
+    s    += abs(V(9) - V(8));
+    s    += abs(V(10) - V(9));
+    s    += abs(V(11) - V(10));
+    s    += abs(V(12) - V(11));
+    s    += abs(V(13) - V(12));
+    s    += abs(V(14) - V(13));
+    s    += abs(V(15) - V(14));
+
+    pix1 += stride;
+    pix2 += stride;
+
+    for (i = 1; i < h; i++) {
+        s    += abs(V(0) - V(-stride));
+        for (j = 1; j < 16; j++)
+            s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
+        pix1 += stride;
+        pix2 += stride;
+
+    }
+#undef V
+    return s;
+}
+
 static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                           ptrdiff_t stride, int h)
 {
@@ -247,6 +286,37 @@ static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     return s;
 }

+static inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                             ptrdiff_t stride, int h)
+{
+    int s = 0, i, j;
+
+#define V(x) (pix1[x] - pix2[x])
+
+    s    += abs(V(0));
+    s    += abs(V(1) - V(0));
+    s    += abs(V(2) - V(1));
+    s    += abs(V(3) - V(2));
+    s    += abs(V(4) - V(3));
+    s    += abs(V(5) - V(4));
+    s    += abs(V(6) - V(5));
+    s    += abs(V(7) - V(6));
+
+    pix1 += stride;
+    pix2 += stride;
+
+    for (i = 1; i < h; i++) {
+        s    += abs(V(0) - V(-stride));
+        for (j = 1; j < 8; j++)
+            s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
+        pix1 += stride;
+        pix2 += stride;
+
+    }
+#undef V
+    return s;
+}
+
 static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                          ptrdiff_t stride, int h)
 {
@@ -378,6 +448,9 @@ void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
         case FF_CMP_SAD:
             cmp[i] = c->sad[i];
             break;
+        case FF_CMP_MEDIAN_SAD:
+            cmp[i] = c->median_sad[i];
+            break;
         case FF_CMP_SATD:
             cmp[i] = c->hadamard8_diff[i];
             break;
@@ -993,4 +1066,7 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
         ff_me_cmp_init_x86(c, avctx);
     if (ARCH_MIPS)
         ff_me_cmp_init_mips(c, avctx);
+
+    c->median_sad[0] = pix_median_abs16_c;
+    c->median_sad[1] = pix_median_abs8_c;
 }
diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h
index a3603ec..5666f59 100644
--- a/libavcodec/me_cmp.h
+++ b/libavcodec/me_cmp.h
@@ -76,6 +76,7 @@ typedef struct MECmpContext {
     me_cmp_func frame_skip_cmp[6]; // only width 8 used

     me_cmp_func pix_abs[2][4];
+    me_cmp_func median_sad[2];
 } MECmpContext;

 void ff_me_cmp_init_static(void);
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 257d00b..25b606f 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -897,6 +897,7 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
     case FF_CMP_NSSE:
         return lambda2>>FF_LAMBDA_SHIFT;
     case FF_CMP_BIT:
+    case FF_CMP_MEDIAN_SAD:
         return 1;
     }
 }
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index a1f3d4b..c82fa3e 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -594,7 +594,8 @@ enum rc_strategy {
 { "nsse",   "Noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
 { "dct264", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT264 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
 { "dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
-{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }
+{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "msad",   "Sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }

 #ifndef FF_MPV_OFFSET
 #define FF_MPV_OFFSET(x) offsetof(MpegEncContext, x)
diff --git a/libavcodec/obme.c b/libavcodec/obme.c
index f442b26..faf943e 100644
--- a/libavcodec/obme.c
+++ b/libavcodec/obme.c
@@ -150,6 +150,7 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
     case FF_CMP_NSSE:
         return lambda2>>FF_LAMBDA_SHIFT;
     case FF_CMP_BIT:
+    case FF_CMP_MEDIAN_SAD:
         return 1;
     }
 }
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index adfbe72..fe35454 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -312,6 +312,7 @@ static const AVOption avcodec_options[] = {
 #endif
 {"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
 #if FF_API_AFD
--
2.7.4 (Apple Git-66)


## 0004-FFV1-p-frame-tests.patch
From 55906230ebd107263a748f7db2c34cb39dbac4f0 Mon Sep 17 00:00:00 2001
From: Stanislav Dolganov <dolganov@qst.hk>
Date: Thu, 18 Aug 2016 14:40:39 +0300
Subject: [PATCH 4/4] FFV1 p frame tests

---
 tests/fate/vcodec.mak                          | 3 ++-
 tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p     | 4 ++++
 tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p     | 4 ++++
 tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p     | 4 ++++
 tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p | 4 ++++
 5 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p
 create mode 100644 tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p
 create mode 100644 tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p
 create mode 100644 tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p

diff --git a/tests/fate/vcodec.mak b/tests/fate/vcodec.mak
index c62abe4..0602aca 100644
--- a/tests/fate/vcodec.mak
+++ b/tests/fate/vcodec.mak
@@ -112,7 +112,7 @@ fate-vsynth%-dv-50:              FMT     = dv

 FATE_VCODEC-$(call ENCDEC, FFV1, AVI)   += ffv1 ffv1-v0 \
                                            ffv1-v3-yuv420p ffv1-v3-yuv422p10 ffv1-v3-yuv444p16 \
-                                           ffv1-v3-bgr0
+                                           ffv1-v3-bgr0 ffv1-v3-p-yuv420p
 fate-vsynth%-ffv1:               ENCOPTS = -slices 4
 fate-vsynth%-ffv1-v0:            CODEC   = ffv1
 fate-vsynth%-ffv1-v3-yuv420p:    ENCOPTS = -level 3 -pix_fmt yuv420p
@@ -125,6 +125,7 @@ fate-vsynth%-ffv1-v3-yuv444p16:  DECOPTS = -sws_flags neighbor+bitexact
 fate-vsynth%-ffv1-v3-bgr0:       ENCOPTS = -level 3 -pix_fmt bgr0 \
                                            -sws_flags neighbor+bitexact
 fate-vsynth%-ffv1-v3-bgr0:       DECOPTS = -sws_flags neighbor+bitexact
+fate-vsynth%-ffv1-v3-p-yuv420p:  ENCOPTS = -level 3 -pframe 1 -pix_fmt yuv420p

 FATE_VCODEC-$(call ENCDEC, FFVHUFF, AVI) += ffvhuff ffvhuff444 ffvhuff420p12 ffvhuff422p10left ffvhuff444p16
 fate-vsynth%-ffvhuff444:         ENCOPTS = -vcodec ffvhuff -pix_fmt yuv444p
diff --git a/tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p
new file mode 100644
index 0000000..dd0fad9
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p
@@ -0,0 +1,4 @@
+fbd29126ad4fe3d1b253888609aaea85 *tests/data/fate/vsynth1-ffv1-v3-p-yuv420p.avi
+2855892 tests/data/fate/vsynth1-ffv1-v3-p-yuv420p.avi
+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-v3-p-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p
new file mode 100644
index 0000000..491c06c
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p
@@ -0,0 +1,4 @@
+974c9ffcca655e1e0e3027f41e99144d *tests/data/fate/vsynth2-ffv1-v3-p-yuv420p.avi
+2737038 tests/data/fate/vsynth2-ffv1-v3-p-yuv420p.avi
+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-v3-p-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p
new file mode 100644
index 0000000..59131a0
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p
@@ -0,0 +1,4 @@
+f969ca8542c8384c27233f362b661f8a *tests/data/fate/vsynth3-ffv1-v3-p-yuv420p.avi
+62194 tests/data/fate/vsynth3-ffv1-v3-p-yuv420p.avi
+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-v3-p-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p
new file mode 100644
index 0000000..2da1afa
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p
@@ -0,0 +1,4 @@
+3863d79a350948e75c355b184ed8f6fc *tests/data/fate/vsynth_lena-ffv1-v3-p-yuv420p.avi
+2658554 tests/data/fate/vsynth_lena-ffv1-v3-p-yuv420p.avi
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v3-p-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
--
2.7.4 (Apple Git-66)


## README.txt
The GSoC 2016 FFV1 p frames support project consist of 4 patches developed for ffmpeg:
1) Factoring OBMC context from Snow codec so this motion estimation and compenstation code can be used for FFV1 as well
2) Developing p frame support as part of FFV1 based on factored code, tuning compression ratio by
 -) enabling halfpixel precision,
 -) 4 motion vector search
 -) iterative motion search
3) Implementing motion estimation compare function which is more closely approximates the way the residual frame is encoded by FFV1
4) Adding new tests

All this patches can be found in the last 4 commits from here:
https://github.com/theacetoace/FFMPEG-FFV1-P-FRAMES/commits/GSOC-ffv1-submission?author=theacetoace
	From 1cb7311d8c6b3d120e79c07c4eaf8006f244026d Mon Sep 17 00:00:00 2001
	From: Stanislav Dolganov <dolganov@qst.hk>
	Date: Thu, 18 Aug 2016 14:40:09 +0300
	Subject: [PATCH 3/4] median compare function for ffv1 me

	---
	libavcodec/avcodec.h \| 33 ++++++++++----------
	libavcodec/ffv1enc.c \| 2 ++
	libavcodec/me_cmp.c \| 76 ++++++++++++++++++++++++++++++++++++++++++++++
	libavcodec/me_cmp.h \| 1 +
	libavcodec/motion_est.c \| 1 +
	libavcodec/mpegvideo.h \| 3 +-
	libavcodec/obme.c \| 1 +
	libavcodec/options_table.h \| 1 +
	8 files changed, 101 insertions(+), 17 deletions(-)

	diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
	index 3b21537..8293b8e 100644
	--- a/libavcodec/avcodec.h
	+++ b/libavcodec/avcodec.h
	@@ -2093,22 +2093,23 @@ typedef struct AVCodecContext {
	* - decoding: unused
	*/
	int ildct_cmp;
	-#define FF_CMP_SAD 0
	-#define FF_CMP_SSE 1
	-#define FF_CMP_SATD 2
	-#define FF_CMP_DCT 3
	-#define FF_CMP_PSNR 4
	-#define FF_CMP_BIT 5
	-#define FF_CMP_RD 6
	-#define FF_CMP_ZERO 7
	-#define FF_CMP_VSAD 8
	-#define FF_CMP_VSSE 9
	-#define FF_CMP_NSSE 10
	-#define FF_CMP_W53 11
	-#define FF_CMP_W97 12
	-#define FF_CMP_DCTMAX 13
	-#define FF_CMP_DCT264 14
	-#define FF_CMP_CHROMA 256
	+#define FF_CMP_SAD 0
	+#define FF_CMP_SSE 1
	+#define FF_CMP_SATD 2
	+#define FF_CMP_DCT 3
	+#define FF_CMP_PSNR 4
	+#define FF_CMP_BIT 5
	+#define FF_CMP_RD 6
	+#define FF_CMP_ZERO 7
	+#define FF_CMP_VSAD 8
	+#define FF_CMP_VSSE 9
	+#define FF_CMP_NSSE 10
	+#define FF_CMP_W53 11
	+#define FF_CMP_W97 12
	+#define FF_CMP_DCTMAX 13
	+#define FF_CMP_DCT264 14
	+#define FF_CMP_MEDIAN_SAD 15
	+#define FF_CMP_CHROMA 256

	/**
	* ME diamond size & shape
	diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
	index 4596585..f946602 100644
	--- a/libavcodec/ffv1enc.c
	+++ b/libavcodec/ffv1enc.c
	@@ -1637,6 +1637,8 @@ static const AVCodecDefault ffv1_defaults[] = {
	{ "coder", "-1" },
	{ "me_method", "iter" },
	{ "flags", "+qpel+mv4" },
	+ { "cmp", "msad" },
	+ { "subcmp", "msad" },
	{ NULL },
	};
	#endif
	diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
	index dc76b07..6639b91 100644
	--- a/libavcodec/me_cmp.c
	+++ b/libavcodec/me_cmp.c
	@@ -139,6 +139,45 @@ static inline int pix_abs16_c(MpegEncContext v, uint8_t pix1, uint8_t *pix2,
	return s;
	}

	+static inline int pix_median_abs16_c(MpegEncContext v, uint8_t pix1, uint8_t *pix2,
	+ ptrdiff_t stride, int h)
	+{
	+ int s = 0, i, j;
	+
	+#define V(x) (pix1[x] - pix2[x])
	+
	+ s += abs(V(0));
	+ s += abs(V(1) - V(0));
	+ s += abs(V(2) - V(1));
	+ s += abs(V(3) - V(2));
	+ s += abs(V(4) - V(3));
	+ s += abs(V(5) - V(4));
	+ s += abs(V(6) - V(5));
	+ s += abs(V(7) - V(6));
	+ s += abs(V(8) - V(7));
	+ s += abs(V(9) - V(8));
	+ s += abs(V(10) - V(9));
	+ s += abs(V(11) - V(10));
	+ s += abs(V(12) - V(11));
	+ s += abs(V(13) - V(12));
	+ s += abs(V(14) - V(13));
	+ s += abs(V(15) - V(14));
	+
	+ pix1 += stride;
	+ pix2 += stride;
	+
	+ for (i = 1; i < h; i++) {
	+ s += abs(V(0) - V(-stride));
	+ for (j = 1; j < 16; j++)
	+ s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
	+ pix1 += stride;
	+ pix2 += stride;
	+
	+ }
	+#undef V
	+ return s;
	+}
	+
	static int pix_abs16_x2_c(MpegEncContext v, uint8_t pix1, uint8_t *pix2,
	ptrdiff_t stride, int h)
	{
	@@ -247,6 +286,37 @@ static inline int pix_abs8_c(MpegEncContext v, uint8_t pix1, uint8_t *pix2,
	return s;
	}

	+static inline int pix_median_abs8_c(MpegEncContext v, uint8_t pix1, uint8_t *pix2,
	+ ptrdiff_t stride, int h)
	+{
	+ int s = 0, i, j;
	+
	+#define V(x) (pix1[x] - pix2[x])
	+
	+ s += abs(V(0));
	+ s += abs(V(1) - V(0));
	+ s += abs(V(2) - V(1));
	+ s += abs(V(3) - V(2));
	+ s += abs(V(4) - V(3));
	+ s += abs(V(5) - V(4));
	+ s += abs(V(6) - V(5));
	+ s += abs(V(7) - V(6));
	+
	+ pix1 += stride;
	+ pix2 += stride;
	+
	+ for (i = 1; i < h; i++) {
	+ s += abs(V(0) - V(-stride));
	+ for (j = 1; j < 8; j++)
	+ s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
	+ pix1 += stride;
	+ pix2 += stride;
	+
	+ }
	+#undef V
	+ return s;
	+}
	+
	static int pix_abs8_x2_c(MpegEncContext v, uint8_t pix1, uint8_t *pix2,
	ptrdiff_t stride, int h)
	{
	@@ -378,6 +448,9 @@ void ff_set_cmp(MECmpContext c, me_cmp_func cmp, int type)
	case FF_CMP_SAD:
	cmp[i] = c->sad[i];
	break;
	+ case FF_CMP_MEDIAN_SAD:
	+ cmp[i] = c->median_sad[i];
	+ break;
	case FF_CMP_SATD:
	cmp[i] = c->hadamard8_diff[i];
	break;
	@@ -993,4 +1066,7 @@ av_cold void ff_me_cmp_init(MECmpContext c, AVCodecContext avctx)
	ff_me_cmp_init_x86(c, avctx);
	if (ARCH_MIPS)
	ff_me_cmp_init_mips(c, avctx);
	+
	+ c->median_sad[0] = pix_median_abs16_c;
	+ c->median_sad[1] = pix_median_abs8_c;
	}
	diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h
	index a3603ec..5666f59 100644
	--- a/libavcodec/me_cmp.h
	+++ b/libavcodec/me_cmp.h
	@@ -76,6 +76,7 @@ typedef struct MECmpContext {
	me_cmp_func frame_skip_cmp[6]; // only width 8 used

	me_cmp_func pix_abs[2][4];
	+ me_cmp_func median_sad[2];
	} MECmpContext;

	void ff_me_cmp_init_static(void);
	diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
	index 257d00b..25b606f 100644
	--- a/libavcodec/motion_est.c
	+++ b/libavcodec/motion_est.c
	@@ -897,6 +897,7 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
	case FF_CMP_NSSE:
	return lambda2>>FF_LAMBDA_SHIFT;
	case FF_CMP_BIT:
	+ case FF_CMP_MEDIAN_SAD:
	return 1;
	}
	}
	diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
	index a1f3d4b..c82fa3e 100644
	--- a/libavcodec/mpegvideo.h
	+++ b/libavcodec/mpegvideo.h
	@@ -594,7 +594,8 @@ enum rc_strategy {
	{ "nsse", "Noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
	{ "dct264", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT264 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
	{ "dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
	-{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }
	+{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
	+{ "msad", "Sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }

	#ifndef FF_MPV_OFFSET
	#define FF_MPV_OFFSET(x) offsetof(MpegEncContext, x)
	diff --git a/libavcodec/obme.c b/libavcodec/obme.c
	index f442b26..faf943e 100644
	--- a/libavcodec/obme.c
	+++ b/libavcodec/obme.c
	@@ -150,6 +150,7 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
	case FF_CMP_NSSE:
	return lambda2>>FF_LAMBDA_SHIFT;
	case FF_CMP_BIT:
	+ case FF_CMP_MEDIAN_SAD:
	return 1;
	}
	}
	diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
	index adfbe72..fe35454 100644
	--- a/libavcodec/options_table.h
	+++ b/libavcodec/options_table.h
	@@ -312,6 +312,7 @@ static const AVOption avcodec_options[] = {
	#endif
	{"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V\|E, "cmp_func"},
	{"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V\|E, "cmp_func"},
	+{"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V\|E, "cmp_func"},
	{"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V\|E},
	{"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V\|E},
	#if FF_API_AFD
	--
	2.7.4 (Apple Git-66)
	From 55906230ebd107263a748f7db2c34cb39dbac4f0 Mon Sep 17 00:00:00 2001
	From: Stanislav Dolganov <dolganov@qst.hk>
	Date: Thu, 18 Aug 2016 14:40:39 +0300
	Subject: [PATCH 4/4] FFV1 p frame tests

	---
	tests/fate/vcodec.mak \| 3 ++-
	tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p \| 4 ++++
	tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p \| 4 ++++
	tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p \| 4 ++++
	tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p \| 4 ++++
	5 files changed, 18 insertions(+), 1 deletion(-)
	create mode 100644 tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p
	create mode 100644 tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p
	create mode 100644 tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p
	create mode 100644 tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p

	diff --git a/tests/fate/vcodec.mak b/tests/fate/vcodec.mak
	index c62abe4..0602aca 100644
	--- a/tests/fate/vcodec.mak
	+++ b/tests/fate/vcodec.mak
	@@ -112,7 +112,7 @@ fate-vsynth%-dv-50: FMT = dv

	FATE_VCODEC-$(call ENCDEC, FFV1, AVI) += ffv1 ffv1-v0 \
	ffv1-v3-yuv420p ffv1-v3-yuv422p10 ffv1-v3-yuv444p16 \
	- ffv1-v3-bgr0
	+ ffv1-v3-bgr0 ffv1-v3-p-yuv420p
	fate-vsynth%-ffv1: ENCOPTS = -slices 4
	fate-vsynth%-ffv1-v0: CODEC = ffv1
	fate-vsynth%-ffv1-v3-yuv420p: ENCOPTS = -level 3 -pix_fmt yuv420p
	@@ -125,6 +125,7 @@ fate-vsynth%-ffv1-v3-yuv444p16: DECOPTS = -sws_flags neighbor+bitexact
	fate-vsynth%-ffv1-v3-bgr0: ENCOPTS = -level 3 -pix_fmt bgr0 \
	-sws_flags neighbor+bitexact
	fate-vsynth%-ffv1-v3-bgr0: DECOPTS = -sws_flags neighbor+bitexact
	+fate-vsynth%-ffv1-v3-p-yuv420p: ENCOPTS = -level 3 -pframe 1 -pix_fmt yuv420p

	FATE_VCODEC-$(call ENCDEC, FFVHUFF, AVI) += ffvhuff ffvhuff444 ffvhuff420p12 ffvhuff422p10left ffvhuff444p16
	fate-vsynth%-ffvhuff444: ENCOPTS = -vcodec ffvhuff -pix_fmt yuv444p
	diff --git a/tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p
	new file mode 100644
	index 0000000..dd0fad9
	--- /dev/null
	+++ b/tests/ref/vsynth/vsynth1-ffv1-v3-p-yuv420p
	@@ -0,0 +1,4 @@
	+fbd29126ad4fe3d1b253888609aaea85 *tests/data/fate/vsynth1-ffv1-v3-p-yuv420p.avi
	+2855892 tests/data/fate/vsynth1-ffv1-v3-p-yuv420p.avi
	+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-v3-p-yuv420p.out.rawvideo
	+stddev: 0.00 PSNR:999.99 MAXDIFF: 0 bytes: 7603200/ 7603200
	diff --git a/tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p
	new file mode 100644
	index 0000000..491c06c
	--- /dev/null
	+++ b/tests/ref/vsynth/vsynth2-ffv1-v3-p-yuv420p
	@@ -0,0 +1,4 @@
	+974c9ffcca655e1e0e3027f41e99144d *tests/data/fate/vsynth2-ffv1-v3-p-yuv420p.avi
	+2737038 tests/data/fate/vsynth2-ffv1-v3-p-yuv420p.avi
	+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-v3-p-yuv420p.out.rawvideo
	+stddev: 0.00 PSNR:999.99 MAXDIFF: 0 bytes: 7603200/ 7603200
	diff --git a/tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p
	new file mode 100644
	index 0000000..59131a0
	--- /dev/null
	+++ b/tests/ref/vsynth/vsynth3-ffv1-v3-p-yuv420p
	@@ -0,0 +1,4 @@
	+f969ca8542c8384c27233f362b661f8a *tests/data/fate/vsynth3-ffv1-v3-p-yuv420p.avi
	+62194 tests/data/fate/vsynth3-ffv1-v3-p-yuv420p.avi
	+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-v3-p-yuv420p.out.rawvideo
	+stddev: 0.00 PSNR:999.99 MAXDIFF: 0 bytes: 86700/ 86700
	diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p b/tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p
	new file mode 100644
	index 0000000..2da1afa
	--- /dev/null
	+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-p-yuv420p
	@@ -0,0 +1,4 @@
	+3863d79a350948e75c355b184ed8f6fc *tests/data/fate/vsynth_lena-ffv1-v3-p-yuv420p.avi
	+2658554 tests/data/fate/vsynth_lena-ffv1-v3-p-yuv420p.avi
	+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v3-p-yuv420p.out.rawvideo
	+stddev: 0.00 PSNR:999.99 MAXDIFF: 0 bytes: 7603200/ 7603200
	--
	2.7.4 (Apple Git-66)
	The GSoC 2016 FFV1 p frames support project consist of 4 patches developed for ffmpeg:
	1) Factoring OBMC context from Snow codec so this motion estimation and compenstation code can be used for FFV1 as well
	2) Developing p frame support as part of FFV1 based on factored code, tuning compression ratio by
	-) enabling halfpixel precision,
	-) 4 motion vector search
	-) iterative motion search
	3) Implementing motion estimation compare function which is more closely approximates the way the residual frame is encoded by FFV1
	4) Adding new tests

	All this patches can be found in the last 4 commits from here:
	https://github.com/theacetoace/FFMPEG-FFV1-P-FRAMES/commits/GSOC-ffv1-submission?author=theacetoace