Commit 18e90d74 authored by Deb Mukherjee's avatar Deb Mukherjee
Browse files

Supporting high precision 1/8-pel motion vectors

This is the initial patch for supporting 1/8th pel
motion. Currently if we configure with enable-high-precision-mv,
all motion vectors would default to 1/8 pel. Encode and
decode syncs fine with the current code. In the next phase
the code will be refactored so that we can choose the 1/8
pel mode adaptively at a frame/segment/mb level.

Derf results:
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html
(about 0.83% better than 8-tap interpoaltion)

Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V

Patch 4: HD results.
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html
Seems impressive (unless I am doing something wrong).

Patch 5: Added mmx/sse for bilateral filtering, as well as enforced
use of c-versions of subpel filters with 8-taps and 1/16th pel;
Also redesigned the 8-tap filters to reduce the cut-off in order to
introduce a denoising effect. There is a new configure option
sixteenth-subpel-uv which will use 1/16 th pel interpolation for
uv, if the motion vectors have 1/8 pel accuracy.

With the fixes the results are promising on the derf set. The enhanced
interpolation option with 8-taps alone gives 3% improvement over thei
derf set:
http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html

Results on high precision mv and on the hd set are to follow.

Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in
vp8/common/x86/x86_systemdependent.c

Patch 7: Cleaning up various debug messages.

Patch 8: Merge conflict

Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04
parent 3c872b6c
...@@ -226,6 +226,8 @@ EXPERIMENT_LIST=" ...@@ -226,6 +226,8 @@ EXPERIMENT_LIST="
enhanced_interp enhanced_interp
superblocks superblocks
featureupdates featureupdates
high_precision_mv
sixteenth_subpel_uv
" "
CONFIG_LIST=" CONFIG_LIST="
external_build external_build
......
...@@ -290,8 +290,8 @@ typedef struct MacroBlockD ...@@ -290,8 +290,8 @@ typedef struct MacroBlockD
/* Delta values have the range +/- MAX_LOOP_FILTER */ /* Delta values have the range +/- MAX_LOOP_FILTER */
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
/* Distance of MB away from frame edges */ /* Distance of MB away from frame edges */
int mb_to_left_edge; int mb_to_left_edge;
...@@ -310,6 +310,9 @@ typedef struct MacroBlockD ...@@ -310,6 +310,9 @@ typedef struct MacroBlockD
vp8_subpix_fn_t subpixel_predict16x16; vp8_subpix_fn_t subpixel_predict16x16;
vp8_subpix_fn_t subpixel_predict_avg8x8; vp8_subpix_fn_t subpixel_predict_avg8x8;
vp8_subpix_fn_t subpixel_predict_avg16x16; vp8_subpix_fn_t subpixel_predict_avg16x16;
#if CONFIG_HIGH_PRECISION_MV
int allow_high_precision_mv;
#endif /* CONFIG_HIGH_PRECISION_MV */
void *current_bc; void *current_bc;
......
...@@ -225,6 +225,28 @@ struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS]; ...@@ -225,6 +225,28 @@ struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];
struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS]; struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS];
struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS]; struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
#if CONFIG_HIGH_PRECISION_MV
const vp8_tree_index vp8_small_mvtree [30] =
{
2, 16,
4, 10,
6, 8,
-0, -1,
-2, -3,
12, 14,
-4, -5,
-6, -7,
18, 24,
20, 22,
-8, -9,
-10, -11,
26, 28,
-12, -13,
-14, -15
};
struct vp8_token_struct vp8_small_mvencodings [16];
#else
const vp8_tree_index vp8_small_mvtree [14] = const vp8_tree_index vp8_small_mvtree [14] =
{ {
...@@ -236,9 +258,11 @@ const vp8_tree_index vp8_small_mvtree [14] = ...@@ -236,9 +258,11 @@ const vp8_tree_index vp8_small_mvtree [14] =
-4, -5, -4, -5,
-6, -7 -6, -7
}; };
struct vp8_token_struct vp8_small_mvencodings [8]; struct vp8_token_struct vp8_small_mvencodings [8];
#endif /* CONFIG_HIGH_PRECISION_MV */
void vp8_init_mbmode_probs(VP8_COMMON *x) void vp8_init_mbmode_probs(VP8_COMMON *x)
{ {
unsigned int bct [VP8_YMODES] [2]; /* num Ymodes > num UV modes */ unsigned int bct [VP8_YMODES] [2]; /* num Ymodes > num UV modes */
...@@ -489,4 +513,3 @@ void print_mv_ref_cts(VP8_COMMON *pc) ...@@ -489,4 +513,3 @@ void print_mv_ref_cts(VP8_COMMON *pc)
printf("\n"); printf("\n");
} }
} }
...@@ -57,7 +57,11 @@ extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS]; ...@@ -57,7 +57,11 @@ extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
extern const vp8_tree_index vp8_small_mvtree[]; extern const vp8_tree_index vp8_small_mvtree[];
#if CONFIG_HIGH_PRECISION_MV
extern struct vp8_token_struct vp8_small_mvencodings [16];
#else
extern struct vp8_token_struct vp8_small_mvencodings [8]; extern struct vp8_token_struct vp8_small_mvencodings [8];
#endif
void vp8_entropy_mode_init(void); void vp8_entropy_mode_init(void);
......
...@@ -11,6 +11,40 @@ ...@@ -11,6 +11,40 @@
#include "entropymv.h" #include "entropymv.h"
#if CONFIG_HIGH_PRECISION_MV
const MV_CONTEXT vp8_mv_update_probs[2] =
{
{{
237,
246,
253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 250, 250, 252, 254, 254, 254
}},
{{
231,
243,
245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 251, 251, 254, 254, 254, 254
}}
};
const MV_CONTEXT vp8_default_mv_context[2] =
{
{{
/* row */
162, /* is short */
128, /* sign */
230, 215, 175, 140, 160, 180, 160, 140, 180, 214, 150, 39, 120, 156, 160, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254, 254 /* long bits */
}},
{{
/* same for column */
164, /* is short */
128,
220, 204, 180, 170, 140, 119, 180, 235, 180, 140, 185, 230, 229, 228, 200,
128, 130, 130, 74, 148, 180, 203, 236, 254, 254, 254 /* long bits */
}}
};
#else
const MV_CONTEXT vp8_mv_update_probs[2] = const MV_CONTEXT vp8_mv_update_probs[2] =
{ {
{{ {{
...@@ -35,15 +69,12 @@ const MV_CONTEXT vp8_default_mv_context[2] = ...@@ -35,15 +69,12 @@ const MV_CONTEXT vp8_default_mv_context[2] =
225, 146, 172, 147, 214, 39, 156, /* short tree */ 225, 146, 172, 147, 214, 39, 156, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
}}, }},
{{ {{
/* same for column */ /* same for column */
164, /* is short */ 164, /* is short */
128, 128,
204, 170, 119, 235, 140, 230, 228, 204, 170, 119, 235, 140, 230, 228,
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
}} }}
}; };
#endif /* CONFIG_HIGH_PRECISION_MV */
...@@ -13,16 +13,32 @@ ...@@ -13,16 +13,32 @@
#define __INC_ENTROPYMV_H #define __INC_ENTROPYMV_H
#include "treecoder.h" #include "treecoder.h"
#include "vpx_config.h"
#if CONFIG_HIGH_PRECISION_MV
#define MV_SHIFT 0
#else
#define MV_SHIFT 1
#endif
enum enum
{ {
#if CONFIG_HIGH_PRECISION_MV
mv_max = 2047, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvlong_width = 11, /* Large MVs have 9 bit magnitudes */
mvnum_short = 16, /* magnitudes 0 through 15 */
mvnum_short_bits = 4, /* number of bits for short mvs */
#else
mv_max = 1023, /* max absolute value of a MV component */ mv_max = 1023, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */ MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */ mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
mvnum_short = 8, /* magnitudes 0 through 7 */ mvnum_short = 8, /* magnitudes 0 through 7 */
mvnum_short_bits = 3, /* number of bits for short mvs */
#endif
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */
/* probability offsets for coding each MV component */ /* probability offsets for coding each MV component */
......
This diff is collapsed.
...@@ -18,15 +18,21 @@ ...@@ -18,15 +18,21 @@
#define VP8_FILTER_WEIGHT 128 #define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7 #define VP8_FILTER_SHIFT 7
#if CONFIG_SIXTEENTH_SUBPEL_UV
#define SUBPEL_SHIFTS 16
#else
#define SUBPEL_SHIFTS 8
#endif
extern const short vp8_bilinear_filters[SUBPEL_SHIFTS][2];
extern const short vp8_sub_pel_filters[SUBPEL_SHIFTS][INTERP_EXTEND*2];
/* whether to use a special filter for edge pixels */ /* whether to use a special filter for edge pixels */
#define EDGE_PIXEL_FILTER 0 #define EDGE_PIXEL_FILTER 0
extern const short vp8_bilinear_filters[8][2];
extern const short vp8_sub_pel_filters[8][INTERP_EXTEND*2];
#if EDGE_PIXEL_FILTER > 0 #if EDGE_PIXEL_FILTER > 0
#define EDGE_PIXEL_FILTER_EXTEND 2 #define EDGE_PIXEL_FILTER_EXTEND 2
extern const short vp8_sub_pel_filters_ns[64][4*EDGE_PIXEL_FILTER_EXTEND*EDGE_PIXEL_FILTER_EXTEND]; extern const short vp8_sub_pel_filters_ns[SUBPEL_SHIFTS*SUBPEL_SHIFTS][4*EDGE_PIXEL_FILTER_EXTEND*EDGE_PIXEL_FILTER_EXTEND];
#endif #endif
#endif //FILTER_H #endif //FILTER_H
...@@ -180,7 +180,11 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf) ...@@ -180,7 +180,11 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{ {
ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
#if CONFIG_SIXTEENTH_SUBPEL_UV
sppf(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
#else
sppf(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); sppf(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
#endif
} }
else else
{ {
...@@ -214,7 +218,11 @@ static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch) ...@@ -214,7 +218,11 @@ static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{ {
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict8x8(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
#else
x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
#endif
} }
else else
{ {
...@@ -233,7 +241,11 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch) ...@@ -233,7 +241,11 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{ {
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict8x4(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
#else
x->subpixel_predict8x4(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); x->subpixel_predict8x4(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
#endif
} }
else else
{ {
...@@ -249,8 +261,10 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) ...@@ -249,8 +261,10 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
unsigned char *upred_ptr = &x->predictor[256]; unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320]; unsigned char *vpred_ptr = &x->predictor[320];
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; int omv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; int omv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int mv_row = omv_row;
int mv_col = omv_col;
int offset; int offset;
int pre_stride = x->block[16].pre_stride; int pre_stride = x->block[16].pre_stride;
...@@ -275,11 +289,19 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) ...@@ -275,11 +289,19 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
uptr = x->pre.u_buffer + offset; uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset; vptr = x->pre.v_buffer + offset;
#if CONFIG_SIXTEENTH_SUBPEL_UV
if ((omv_row | omv_col) & 15)
{
x->subpixel_predict8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, omv_col & 15, omv_row & 15, vpred_ptr, 8);
}
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
if ((mv_row | mv_col) & 7) if ((mv_row | mv_col) & 7)
{ {
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
} }
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
else else
{ {
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8); RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
...@@ -361,7 +383,11 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x) ...@@ -361,7 +383,11 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
if ((mv_row | mv_col) & 7) if ((mv_row | mv_col) & 7)
{ {
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict16x16(ptr, pre_stride, (mv_col & 7)<<1, (mv_row & 7)<<1, pred_ptr, 16);
#else
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
#endif
} }
else else
{ {
...@@ -418,6 +444,7 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -418,6 +444,7 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *ptr; unsigned char *ptr;
unsigned char *uptr, *vptr; unsigned char *uptr, *vptr;
int_mv _o16x16mv;
int_mv _16x16mv; int_mv _16x16mv;
unsigned char *ptr_base = x->pre.y_buffer; unsigned char *ptr_base = x->pre.y_buffer;
...@@ -434,13 +461,18 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -434,13 +461,18 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
if ( _16x16mv.as_int & 0x00070007) if ( _16x16mv.as_int & 0x00070007)
{ {
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict16x16(ptr, pre_stride, (_16x16mv.as_mv.col & 7)<<1, (_16x16mv.as_mv.row & 7)<<1, dst_y, dst_ystride);
#else
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride); x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
#endif
} }
else else
{ {
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride); RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
} }
_o16x16mv = _16x16mv;
/* calc uv motion vectors */ /* calc uv motion vectors */
if ( _16x16mv.as_mv.row < 0) if ( _16x16mv.as_mv.row < 0)
_16x16mv.as_mv.row -= 1; _16x16mv.as_mv.row -= 1;
...@@ -463,11 +495,19 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -463,11 +495,19 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
uptr = x->pre.u_buffer + offset; uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset; vptr = x->pre.v_buffer + offset;
#if CONFIG_SIXTEENTH_SUBPEL_UV
if ( _o16x16mv.as_int & 0x000f000f)
{
x->subpixel_predict8x8(uptr, pre_stride, _o16x16mv.as_mv.col & 15, _o16x16mv.as_mv.row & 15, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _o16x16mv.as_mv.col & 15, _o16x16mv.as_mv.row & 15, dst_v, dst_uvstride);
}
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
if ( _16x16mv.as_int & 0x00070007) if ( _16x16mv.as_int & 0x00070007)
{ {
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
} }
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
else else
{ {
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride); RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride);
...@@ -503,6 +543,7 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -503,6 +543,7 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
int mv_row = x->mode_info_context->mbmi.second_mv.as_mv.row; int mv_row = x->mode_info_context->mbmi.second_mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.second_mv.as_mv.col; int mv_col = x->mode_info_context->mbmi.second_mv.as_mv.col;
int omv_row, omv_col;
unsigned char *ptr_base = x->second_pre.y_buffer; unsigned char *ptr_base = x->second_pre.y_buffer;
int pre_stride = x->block[0].pre_stride; int pre_stride = x->block[0].pre_stride;
...@@ -511,7 +552,11 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -511,7 +552,11 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
if ((mv_row | mv_col) & 7) if ((mv_row | mv_col) & 7)
{ {
#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7)<<1, (mv_row & 7)<<1, dst_y, dst_ystride);
#else
x->subpixel_predict_avg16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride); x->subpixel_predict_avg16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride);
#endif
} }
else else
{ {
...@@ -519,6 +564,8 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -519,6 +564,8 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
} }
/* calc uv motion vectors */ /* calc uv motion vectors */
omv_row = mv_row;
omv_col = mv_col;
mv_row = (mv_row + (mv_row > 0)) >> 1; mv_row = (mv_row + (mv_row > 0)) >> 1;
mv_col = (mv_col + (mv_col > 0)) >> 1; mv_col = (mv_col + (mv_col > 0)) >> 1;
...@@ -530,11 +577,19 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x, ...@@ -530,11 +577,19 @@ void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
uptr = x->second_pre.u_buffer + offset; uptr = x->second_pre.u_buffer + offset;
vptr = x->second_pre.v_buffer + offset; vptr = x->second_pre.v_buffer + offset;
#if CONFIG_SIXTEENTH_SUBPEL_UV
if ((omv_row | omv_col) & 15)
{
x->subpixel_predict_avg8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, dst_u, dst_uvstride);
x->subpixel_predict_avg8x8(vptr, pre_stride, omv_col & 15, omv_row & 15, dst_v, dst_uvstride);
}
#else /* CONFIG_SIXTEENTH_SUBPEL_UV */
if ((mv_row | mv_col) & 7) if ((mv_row | mv_col) & 7)
{ {
x->subpixel_predict_avg8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride); x->subpixel_predict_avg8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride);
x->subpixel_predict_avg8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride); x->subpixel_predict_avg8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride);
} }
#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
else else
{ {
RECON_INVOKE(&x->rtcd->recon, avg8x8)(uptr, pre_stride, dst_u, dst_uvstride); RECON_INVOKE(&x->rtcd->recon, avg8x8)(uptr, pre_stride, dst_u, dst_uvstride);
......
...@@ -1495,13 +1495,33 @@ k2_k4: ...@@ -1495,13 +1495,33 @@ k2_k4:
times 8 db 36, -11 times 8 db 36, -11
times 8 db 12, -6 times 8 db 12, -6
align 16 align 16
%if CONFIG_SIXTEENTH_SUBPEL_UV
vp8_bilinear_filters_ssse3: vp8_bilinear_filters_ssse3:
times 8 db 128, 0 times 8 db 128, 0
times 8 db 120, 8
times 8 db 112, 16 times 8 db 112, 16
times 8 db 104, 24
times 8 db 96, 32 times 8 db 96, 32
times 8 db 88, 40
times 8 db 80, 48 times 8 db 80, 48
times 8 db 72, 56
times 8 db 64, 64 times 8 db 64, 64
times 8 db 56, 72
times 8 db 48, 80 times 8 db 48, 80
times 8 db 40, 88
times 8 db 32, 96 times 8 db 32, 96
times 8 db 24, 104
times 8 db 16, 112 times 8 db 16, 112
times 8 db 8, 120
%else
vp8_bilinear_filters_ssse3:
times 8 db 128, 0
times 8 db 112, 16
times 8 db 96, 32
times 8 db 80, 48
times 8 db 64, 64
times 8 db 48, 80
times 8 db 32, 96
times 8 db 16, 112
%endif
...@@ -13,8 +13,15 @@ ...@@ -13,8 +13,15 @@
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#include "vp8/common/subpixel.h" #include "vp8/common/subpixel.h"
#if CONFIG_SIXTEENTH_SUBPEL_UV
extern const short vp8_six_tap_mmx[16][6*8];
extern const short vp8_bilinear_filters_mmx[16][2*8];
#else
extern const short vp8_six_tap_mmx[8][6*8]; extern const short vp8_six_tap_mmx[8][6*8];
extern const short vp8_bilinear_filters_mmx[8][2*8]; extern const short vp8_bilinear_filters_mmx[8][2*8];
#endif
//#define ANNOUNCE_FUNCTION