Commit 18e90d74 authored by Deb Mukherjee's avatar Deb Mukherjee

Supporting high precision 1/8-pel motion vectors

This is the initial patch for supporting 1/8th pel
motion. Currently if we configure with enable-high-precision-mv,
all motion vectors would default to 1/8 pel. Encode and
decode syncs fine with the current code. In the next phase
the code will be refactored so that we can choose the 1/8
pel mode adaptively at a frame/segment/mb level.

Derf results:
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html
(about 0.83% better than 8-tap interpoaltion)

Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V

Patch 4: HD results.
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html
Seems impressive (unless I am doing something wrong).

Patch 5: Added mmx/sse for bilateral filtering, as well as enforced
use of c-versions of subpel filters with 8-taps and 1/16th pel;
Also redesigned the 8-tap filters to reduce the cut-off in order to
introduce a denoising effect. There is a new configure option
sixteenth-subpel-uv which will use 1/16 th pel interpolation for
uv, if the motion vectors have 1/8 pel accuracy.

With the fixes the results are promising on the derf set. The enhanced
interpolation option with 8-taps alone gives 3% improvement over thei
derf set:
http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html

Results on high precision mv and on the hd set are to follow.

Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in
vp8/common/x86/x86_systemdependent.c

Patch 7: Cleaning up various debug messages.

Patch 8: Merge conflict

Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04
parent 3c872b6c
......@@ -226,6 +226,8 @@ EXPERIMENT_LIST="
enhanced_interp
superblocks
featureupdates
high_precision_mv
sixteenth_subpel_uv
"
CONFIG_LIST="
external_build
......
......@@ -290,8 +290,8 @@ typedef struct MacroBlockD
/* Delta values have the range +/- MAX_LOOP_FILTER */
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
/* Distance of MB away from frame edges */
int mb_to_left_edge;
......@@ -310,6 +310,9 @@ typedef struct MacroBlockD
vp8_subpix_fn_t subpixel_predict16x16;
vp8_subpix_fn_t subpixel_predict_avg8x8;
vp8_subpix_fn_t subpixel_predict_avg16x16;
#if CONFIG_HIGH_PRECISION_MV
int allow_high_precision_mv;
#endif /* CONFIG_HIGH_PRECISION_MV */
void *current_bc;
......
......@@ -225,6 +225,28 @@ struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];
struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS];
struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
#if CONFIG_HIGH_PRECISION_MV
const vp8_tree_index vp8_small_mvtree [30] =
{
2, 16,
4, 10,
6, 8,
-0, -1,
-2, -3,
12, 14,
-4, -5,
-6, -7,
18, 24,
20, 22,
-8, -9,
-10, -11,
26, 28,
-12, -13,
-14, -15
};
struct vp8_token_struct vp8_small_mvencodings [16];
#else
const vp8_tree_index vp8_small_mvtree [14] =
{
......@@ -236,9 +258,11 @@ const vp8_tree_index vp8_small_mvtree [14] =
-4, -5,
-6, -7
};
struct vp8_token_struct vp8_small_mvencodings [8];
#endif /* CONFIG_HIGH_PRECISION_MV */
void vp8_init_mbmode_probs(VP8_COMMON *x)
{
unsigned int bct [VP8_YMODES] [2]; /* num Ymodes > num UV modes */
......@@ -489,4 +513,3 @@ void print_mv_ref_cts(VP8_COMMON *pc)
printf("\n");
}
}
......@@ -57,7 +57,11 @@ extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
extern const vp8_tree_index vp8_small_mvtree[];
#if CONFIG_HIGH_PRECISION_MV
extern struct vp8_token_struct vp8_small_mvencodings [16];
#else
extern struct vp8_token_struct vp8_small_mvencodings [8];
#endif
void vp8_entropy_mode_init(void);
......
......@@ -11,6 +11,40 @@
#include "entropymv.h"
#if CONFIG_HIGH_PRECISION_MV
const MV_CONTEXT vp8_mv_update_probs[2] =
{
{{
237,
246,
253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 250, 250, 252, 254, 254, 254
}},
{{
231,
243,
245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 251, 251, 254, 254, 254, 254
}}
};
const MV_CONTEXT vp8_default_mv_context[2] =
{
{{
/* row */
162, /* is short */
128, /* sign */
230, 215, 175, 140, 160, 180, 160, 140, 180, 214, 150, 39, 120, 156, 160, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254, 254 /* long bits */
}},
{{
/* same for column */
164, /* is short */
128,
220, 204, 180, 170, 140, 119, 180, 235, 180, 140, 185, 230, 229, 228, 200,
128, 130, 130, 74, 148, 180, 203, 236, 254, 254, 254 /* long bits */
}}
};
#else
const MV_CONTEXT vp8_mv_update_probs[2] =
{
{{
......@@ -35,15 +69,12 @@ const MV_CONTEXT vp8_default_mv_context[2] =
225, 146, 172, 147, 214, 39, 156, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
}},
{{
/* same for column */
164, /* is short */
128,
204, 170, 119, 235, 140, 230, 228,
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
}}
};
#endif /* CONFIG_HIGH_PRECISION_MV */
......@@ -13,16 +13,32 @@
#define __INC_ENTROPYMV_H
#include "treecoder.h"
#include "vpx_config.h"
#if CONFIG_HIGH_PRECISION_MV
#define MV_SHIFT 0
#else
#define MV_SHIFT 1
#endif
enum
{
#if CONFIG_HIGH_PRECISION_MV
mv_max = 2047, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvlong_width = 11, /* Large MVs have 9 bit magnitudes */
mvnum_short = 16, /* magnitudes 0 through 15 */
mvnum_short_bits = 4, /* number of bits for short mvs */
#else
mv_max = 1023, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
mvnum_short = 8, /* magnitudes 0 through 7 */
mvnum_short_bits = 3, /* number of bits for short mvs */
#endif
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */
/* probability offsets for coding each MV component */
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -18,15 +18,21 @@
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SUBPEL_SHIFTS 16
#else
#define SUBPEL_SHIFTS 8
#endif
extern const short vp8_bilinear_filters[SUBPEL_SHIFTS][2];
extern const short vp8_sub_pel_filters[SUBPEL_SHIFTS][INTERP_EXTEND*2];
/* whether to use a special filter for edge pixels */
#define EDGE_PIXEL_FILTER 0
extern const short vp8_bilinear_filters[8][2];
extern const short vp8_sub_pel_filters[8][INTERP_EXTEND*2];
#if EDGE_PIXEL_FILTER > 0
#define EDGE_PIXEL_FILTER_EXTEND 2
extern const short vp8_sub_pel_filters_ns[64][4*EDGE_PIXEL_FILTER_EXTEND*EDGE_PIXEL_FILTER_EXTEND];
extern const short vp8_sub_pel_filters_ns[SUBPEL_SHIFTS*SUBPEL_SHIFTS][4*EDGE_PIXEL_FILTER_EXTEND*EDGE_PIXEL_FILTER_EXTEND];
#endif
#endif //FILTER_H
......@@ -180,7 +180,11 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
#if CONFIG_SIXTEENTH_SUBPEL_UV
sppf(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
#else
sppf(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
#endif
}
else
{
......@@ -214,7 +218,11 @@ static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict8x8(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
#else
x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
#endif
}
else
{
......@@ -233,7 +241,11 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict8x4(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
#else
x->subpixel_predict8x4(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
#endif
}
else
{
......@@ -249,8 +261,10 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int omv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int omv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int mv_row = omv_row;
int mv_col = omv_col;
int offset;
int pre_stride = x->block[16].pre_stride;
......@@ -275,11 +289,19 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
#if CONFIG_SIXTEENTH_SUBPEL_UV
if ((omv_row | omv_col) & 15)
{
x->subpixel_predict8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, omv_col & 15, omv_row & 15, vpred_ptr, 8);
}
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
}
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
......@@ -361,7 +383,11 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
if ((mv_row | mv_col) & 7)
{
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict16x16(ptr, pre_stride, (mv_col & 7)<<1, (mv_row & 7)<<1, pred_ptr, 16);
#else
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
#endif
}
else
{
......@@ -418,6 +444,7 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *ptr;
unsigned char *uptr, *vptr;
int_mv _o16x16mv;
int_mv _16x16mv;
unsigned char *ptr_base = x->pre.y_buffer;
......@@ -434,13 +461,18 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
if ( _16x16mv.as_int & 0x00070007)
{
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict16x16(ptr, pre_stride, (_16x16mv.as_mv.col & 7)<<1, (_16x16mv.as_mv.row & 7)<<1, dst_y, dst_ystride);
#else
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
#endif
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
}
_o16x16mv = _16x16mv;
/* calc uv motion vectors */
if ( _16x16mv.as_mv.row < 0)
_16x16mv.as_mv.row -= 1;
......@@ -463,11 +495,19 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
#if CONFIG_SIXTEENTH_SUBPEL_UV
if ( _o16x16mv.as_int & 0x000f000f)
{
x->subpixel_predict8x8(uptr, pre_stride, _o16x16mv.as_mv.col & 15, _o16x16mv.as_mv.row & 15, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _o16x16mv.as_mv.col & 15, _o16x16mv.as_mv.row & 15, dst_v, dst_uvstride);
}
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
if ( _16x16mv.as_int & 0x00070007)
{
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
}
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride);
......@@ -503,6 +543,7 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
int mv_row = x->mode_info_context->mbmi.second_mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.second_mv.as_mv.col;
int omv_row, omv_col;
unsigned char *ptr_base = x->second_pre.y_buffer;
int pre_stride = x->block[0].pre_stride;
......@@ -511,7 +552,11 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
if ((mv_row | mv_col) & 7)
{
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7)<<1, (mv_row & 7)<<1, dst_y, dst_ystride);
#else
x->subpixel_predict_avg16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride);
#endif
}
else
{
......@@ -519,6 +564,8 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
}
/* calc uv motion vectors */
omv_row = mv_row;
omv_col = mv_col;
mv_row = (mv_row + (mv_row > 0)) >> 1;
mv_col = (mv_col + (mv_col > 0)) >> 1;
......@@ -530,11 +577,19 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
uptr = x->second_pre.u_buffer + offset;
vptr = x->second_pre.v_buffer + offset;
#if CONFIG_SIXTEENTH_SUBPEL_UV
if ((omv_row | omv_col) & 15)
{
x->subpixel_predict_avg8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, dst_u, dst_uvstride);
x->subpixel_predict_avg8x8(vptr, pre_stride, omv_col & 15, omv_row & 15, dst_v, dst_uvstride);
}
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict_avg8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride);
x->subpixel_predict_avg8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride);
}
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
else
{
RECON_INVOKE(&x->rtcd->recon, avg8x8)(uptr, pre_stride, dst_u, dst_uvstride);
......
......@@ -1495,13 +1495,33 @@ k2_k4:
times 8 db 36, -11
times 8 db 12, -6
align 16
%if CONFIG_SIXTEENTH_SUBPEL_UV
vp8_bilinear_filters_ssse3:
times 8 db 128, 0
times 8 db 120, 8
times 8 db 112, 16
times 8 db 104, 24
times 8 db 96, 32
times 8 db 88, 40
times 8 db 80, 48
times 8 db 72, 56
times 8 db 64, 64
times 8 db 56, 72
times 8 db 48, 80
times 8 db 40, 88
times 8 db 32, 96
times 8 db 24, 104
times 8 db 16, 112
times 8 db 8, 120
%else
vp8_bilinear_filters_ssse3:
times 8 db 128, 0
times 8 db 112, 16
times 8 db 96, 32
times 8 db 80, 48
times 8 db 64, 64
times 8 db 48, 80
times 8 db 32, 96
times 8 db 16, 112
%endif
......@@ -13,8 +13,15 @@
#include "vpx_ports/mem.h"
#include "vp8/common/subpixel.h"
#if CONFIG_SIXTEENTH_SUBPEL_UV
extern const short vp8_six_tap_mmx[16][6*8];
extern const short vp8_bilinear_filters_mmx[16][2*8];
#else
extern const short vp8_six_tap_mmx[8][6*8];
extern const short vp8_bilinear_filters_mmx[8][2*8];
#endif
//#define ANNOUNCE_FUNCTION
extern void vp8_filter_block1d_h6_mmx
(
......@@ -128,6 +135,9 @@ void vp8_sixtap_predict4x4_mmx
int dst_pitch
)
{
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict4x4_mmx\n");
#endif
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
HFilter = vp8_six_tap_mmx[xoffset];
......@@ -149,6 +159,9 @@ void vp8_sixtap_predict16x16_mmx
)
{
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict16x16_mmx\n");
#endif
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
......@@ -181,6 +194,9 @@ void vp8_sixtap_predict8x8_mmx
)
{
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict8x8_mmx\n");
#endif
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
......@@ -206,7 +222,9 @@ void vp8_sixtap_predict8x4_mmx
int dst_pitch
)
{
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict8x4_mmx\n");
#endif
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
......@@ -256,6 +274,9 @@ void vp8_sixtap_predict16x16_sse2
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict16x16_sse2\n");
#endif
if (xoffset)
{
......@@ -295,6 +316,9 @@ void vp8_sixtap_predict8x8_sse2
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict8x8_sse2\n");
#endif
if (xoffset)
{
......@@ -333,6 +357,9 @@ void vp8_sixtap_predict8x4_sse2
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict8x4_sse2\n");
#endif
if (xoffset)
{
......@@ -434,6 +461,9 @@ void vp8_sixtap_predict16x16_ssse3
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict16x16_ssse3\n");
#endif
if (xoffset)
{
......@@ -466,6 +496,9 @@ void vp8_sixtap_predict8x8_ssse3
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict8x8_ssse3\n");
#endif
if (xoffset)
{
......@@ -498,6 +531,9 @@ void vp8_sixtap_predict8x4_ssse3
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict8x4_ssse3\n");
#endif
if (xoffset)
{
......@@ -530,6 +566,9 @@ void vp8_sixtap_predict4x4_ssse3
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
#ifdef ANNOUNCE_FUNCTION
printf("vp8_sixtap_predict4x4_ssse3\n");
#endif
if (xoffset)
{
......
......@@ -43,17 +43,17 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
rtcd->recon.recon = vp8_recon_b_mmx;
rtcd->recon.copy8x8 = vp8_copy_mem8x8_mmx;
rtcd->recon.copy8x4 = vp8_copy_mem8x4_mmx;
rtcd->recon.copy16x16 = vp8_copy_mem16x16_mmx;
#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_mmx;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_mmx;
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_mmx;
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_mmx;
#endif
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_mmx;
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_mmx;
rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_mmx;
......@@ -91,9 +91,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_sse2;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_sse2;
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_sse2;
#endif
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_sse2;
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_sse2;
......@@ -120,12 +122,14 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
if (flags & HAS_SSSE3)
{
#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3;
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3;
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3;
#endif
rtcd->recon.build_intra_predictors_mbuv =
vp8_build_intra_predictors_mbuv_ssse3;
......
......@@ -21,6 +21,12 @@
#if CONFIG_DEBUG
#include <assert.h>
#endif
//#define DEBUG_DEC_MV
#ifdef DEBUG_DEC_MV
int dec_mvcount = 0;
#endif
static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
{
const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
......@@ -173,7 +179,7 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
{
x += vp8_read(r, p [MVPbits + i]) << i;
}
while (++i < 3);
while (++i < mvnum_short_bits);
i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
......@@ -181,10 +187,10 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
{
x += vp8_read(r, p [MVPbits + i]) << i;
}
while (--i > 3);
while (--i > mvnum_short_bits);
if (!(x & 0xFFF0) || vp8_read(r, p [MVPbits + 3]))
x += 8;
if (!(x & ~((2<<mvnum_short_bits)-1)) || vp8_read(r, p [MVPbits + mvnum_short_bits]))
x += (mvnum_short);
}
else /* small */
x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort);
......@@ -197,8 +203,14 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc)
{
mv->row = (short)(read_mvcomponent(r, mvc) << 1);
mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
mv->row = (short)(read_mvcomponent(r, mvc) << MV_SHIFT);
mv->col = (short)(read_mvcomponent(r, ++mvc) << MV_SHIFT);
#ifdef DEBUG_DEC_MV
int i;
printf("%d: %d %d\n", dec_mvcount++, mv->row, mv->col);
for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[-1])->prob[i]); printf("\n");
for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[0])->prob[i]); printf("\n");