Commit 6fa47a5f by Deb Mukherjee

### Adds support for enhanced interpolation for subpel motion

```using an 8-tap filter.

The results with 3 different 8-tap filters on the derf set are in:
The one that gives the most gain achieves an overall gain of about
0.6%. The results for a set of 12 hd (720p) videos are in:
with max gain of 0.55% with the same filter. The best filter apparently
achieves the best trade-off between pass band ripple and stop band
attenuation.

Change-Id: I919e28ae245c0493147fa0864f8c9d048a9dd530```
parent 5a5d24ee
 ... ... @@ -227,6 +227,7 @@ EXPERIMENT_LIST=" uvintra newnear newlpf enhanced_interp " CONFIG_LIST=" external_build ... ...
 ... ... @@ -25,6 +25,50 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { 16, 112 } }; #if CONFIG_ENHANCED_INTERP #define FILTER_ALPHA 75 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][2*INTERP_EXTEND]) = { /* Generated using MATLAB: * alpha = 0.75; * b=intfilt(8,4,alpha); * bi=round(128*b); * ba=flipud(reshape([bi 0], 8, 8)); * disp(num2str(ba, '%d,')) */ #if FILTER_ALPHA == 75 /* alpha = 0.75 */ { 0, 0, 0, 128, 0, 0, 0, 0}, {-1, 4, -11, 123, 18, -7, 3, -1}, {-2, 7, -19, 113, 38, -14, 6, -2}, {-3, 9, -22, 98, 59, -19, 8, -3}, {-3, 9, -22, 80, 80, -22, 9, -3}, {-3, 8, -19, 59, 98, -22, 9, -3}, {-2, 6, -14, 38, 113, -19, 7, -2}, {-1, 3, -7, 18, 123, -11, 4, -1} #elif FILTER_ALPHA == 625 /* alpha = 0.625 */ { 0, 0, 0, 128, 0, 0, 0, 0}, {-1, 3, -10, 123, 18, -6, 2, -1}, {-1, 5, -17, 112, 38, -12, 4, -1}, {-1, 6, -20, 97, 58, -17, 6, -1}, {-2, 7, -20, 79, 79, -20, 7, -2}, {-1, 6, -17, 58, 97, -20, 6, -1}, {-1, 4, -12, 38, 112, -17, 5, -1}, {-1, 2, -6, 18, 123, -10, 3, -1} #elif FILTER_ALPHA == 50 /* alpha = 0.5 */ { 0, 0, 0, 128, 0, 0, 0, 0}, { 0, 2, -10, 122, 18, -6, 2, 0}, {-1, 4, -16, 112, 37, -11, 3, -1}, {-1, 5, -18, 96, 58, -16, 4, -1}, {-1, 5, -18, 78, 78, -18, 5, -1}, {-1, 4, -16, 58, 96, -18, 5, -1}, {-1, 3, -11, 37, 112, -16, 4, -1}, { 0, 2, -6, 18, 122, -10, 2, 0} #endif }; #else DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { ... ... @@ -37,6 +81,7 @@ DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0 }, }; #endif static void filter_block2d_first_pass ( ... ... @@ -56,6 +101,7 @@ static void filter_block2d_first_pass { for (j = 0; j < output_width; j++) { #if INTERP_EXTEND == 3 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[0] * vp8_filter[2]) + ... ... @@ -63,6 +109,29 @@ static void filter_block2d_first_pass ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #elif INTERP_EXTEND == 4 Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) + ((int)src_ptr[0] * vp8_filter[3]) + ((int)src_ptr[pixel_step] * vp8_filter[4]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) + ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #elif INTERP_EXTEND == 5 Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) + ((int)src_ptr[0] * vp8_filter[4]) + ((int)src_ptr[pixel_step] * vp8_filter[5]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) + ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) + ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #endif /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; ... ... @@ -102,6 +171,7 @@ static void filter_block2d_second_pass for (j = 0; j < output_width; j++) { /* Apply filter */ #if INTERP_EXTEND == 3 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[0] * vp8_filter[2]) + ... ... @@ -109,6 +179,29 @@ static void filter_block2d_second_pass ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #elif INTERP_EXTEND == 4 Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) + ((int)src_ptr[0] * vp8_filter[3]) + ((int)src_ptr[pixel_step] * vp8_filter[4]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) + ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #elif INTERP_EXTEND == 5 Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) + ((int)src_ptr[0] * vp8_filter[4]) + ((int)src_ptr[pixel_step] * vp8_filter[5]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) + ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) + ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #endif /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; ... ... @@ -157,6 +250,7 @@ static void filter_block2d_second_pass_avg for (j = 0; j < output_width; j++) { /* Apply filter */ #if INTERP_EXTEND == 3 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[0] * vp8_filter[2]) + ... ... @@ -164,6 +258,29 @@ static void filter_block2d_second_pass_avg ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #elif INTERP_EXTEND == 4 Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) + ((int)src_ptr[0] * vp8_filter[3]) + ((int)src_ptr[pixel_step] * vp8_filter[4]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[5]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[6]) + ((int)src_ptr[4 * pixel_step] * vp8_filter[7]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #elif INTERP_EXTEND == 5 Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) + ((int)src_ptr[0] * vp8_filter[4]) + ((int)src_ptr[pixel_step] * vp8_filter[5]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[6]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[7]) + ((int)src_ptr[4 * pixel_step] * vp8_filter[8]) + ((int)src_ptr[5 * pixel_step] * vp8_filter[9]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ #endif /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; ... ... @@ -194,13 +311,14 @@ static void filter_block2d const short *VFilter ) { int FData[9*4]; /* Temp data buffer used in filtering */ int FData[(3+INTERP_EXTEND*2)*4]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, 3+INTERP_EXTEND*2, 4, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); filter_block2d_second_pass(FData + 4*(INTERP_EXTEND-1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter); } ... ... @@ -234,17 +352,19 @@ void vp8_sixtap_predict8x8_c { const short *HFilter; const short *VFilter; int FData[13*16]; /* Temp data buffer used in filtering */ // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, 7+INTERP_EXTEND*2, 8, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } ... ... @@ -261,16 +381,18 @@ void vp8_sixtap_predict_avg8x8_c { const short *HFilter; const short *VFilter; int FData[13*16]; /* Temp data buffer used in filtering */ // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ int FData[(7+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, 7+INTERP_EXTEND*2, 8, HFilter); /* then filter verticaly... */ filter_block2d_second_pass_avg(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); filter_block2d_second_pass_avg(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } #endif /* CONFIG_DUALPRED */ ... ... @@ -286,17 +408,19 @@ void vp8_sixtap_predict8x4_c { const short *HFilter; const short *VFilter; int FData[13*16]; /* Temp data buffer used in filtering */ // int FData[(7+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ int FData[(3+INTERP_EXTEND*2)*8]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, 3+INTERP_EXTEND*2, 8, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); } ... ... @@ -312,17 +436,19 @@ void vp8_sixtap_predict16x16_c { const short *HFilter; const short *VFilter; int FData[21*24]; /* Temp data buffer used in filtering */ // int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */ int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, 15+INTERP_EXTEND*2, 16, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); filter_block2d_second_pass(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); } ... ... @@ -339,17 +465,18 @@ void vp8_sixtap_predict_avg16x16_c { const short *HFilter; const short *VFilter; int FData[21*24]; /* Temp data buffer used in filtering */ // int FData[(15+INTERP_EXTEND*2)*24]; /* Temp data buffer used in filtering */ int FData[(15+INTERP_EXTEND*2)*16]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1, 15+INTERP_EXTEND*2, 16, HFilter); /* then filter verticaly... */ filter_block2d_second_pass_avg(FData + 32, dst_ptr, dst_pitch, filter_block2d_second_pass_avg(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); } #endif /* CONFIG_DUALPRED */ ... ...
 ... ... @@ -8,15 +8,17 @@ * be found in the AUTHORS file in the root of the source tree. */ #ifndef FILTER_H #define FILTER_H #include "vpx_config.h" #include "vpx_scale/yv12config.h" #define BLOCK_HEIGHT_WIDTH 4 #define VP8_FILTER_WEIGHT 128 #define VP8_FILTER_SHIFT 7 extern const short vp8_bilinear_filters[8][2]; extern const short vp8_sub_pel_filters[8][6]; extern const short vp8_sub_pel_filters[8][INTERP_EXTEND*2]; #endif //FILTER_H
 ... ... @@ -335,8 +335,8 @@ static void update_mbgraph_frame_stats // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_top_mv.as_int = 0; gld_top_mv.as_int = 0; x->mv_row_min = -(VP8BORDERINPIXELS - 19); x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 19; x->mv_row_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND); x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND; xd->up_available = 0; xd->dst.y_stride = buf->y_stride; xd->pre.y_stride = buf->y_stride; ... ... @@ -353,8 +353,8 @@ static void update_mbgraph_frame_stats // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_left_mv.as_int = arf_top_mv.as_int; gld_left_mv.as_int = gld_top_mv.as_int; x->mv_col_min = -(VP8BORDERINPIXELS - 19); x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 19; x->mv_col_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND); x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND; xd->left_available = 0; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) ... ...
 ... ... @@ -268,6 +268,7 @@ void vp8_initialize() { static int init_done = 0; if (!init_done) { vp8_scale_machine_specific_config(); ... ...
 ... ... @@ -17,7 +17,14 @@ extern "C" #endif #define VP7BORDERINPIXELS 48 #if CONFIG_ENHANCED_INTERP #define VP8BORDERINPIXELS 64 #define INTERP_EXTEND 4 #else #define VP8BORDERINPIXELS 32 #define INTERP_EXTEND 3 #endif /************************************* For INT_YUV: ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!