Commit 8e3d0e4d authored by Jingning Han's avatar Jingning Han

Add building blocks for 4x8/8x4 rd search

These building blocks enable rate-distortion optimization search
over block sizes of 8x4 and 4x8. Need to convert them into mmx/sse
forms.

Change-Id: I570ea2d22d14ceec3fe3575128d7dfa172a577de
parent c0f70cca
...@@ -272,7 +272,7 @@ typedef struct { ...@@ -272,7 +272,7 @@ typedef struct {
typedef struct { typedef struct {
MB_MODE_INFO mbmi; MB_MODE_INFO mbmi;
union b_mode_info bmi[4]; union b_mode_info bmi[16];
} MODE_INFO; } MODE_INFO;
struct scale_factors { struct scale_factors {
......
...@@ -367,6 +367,19 @@ vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt ...@@ -367,6 +367,19 @@ vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt
prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance8x8 specialize vp9_sub_pixel_avg_variance8x8
# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance8x4
prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance8x4
prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance4x8
prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance4x8
prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance4x4 sse2 mmx specialize vp9_sub_pixel_variance4x4 sse2 mmx
vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
...@@ -404,6 +417,13 @@ specialize vp9_sad8x16 mmx sse2 ...@@ -404,6 +417,13 @@ specialize vp9_sad8x16 mmx sse2
prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad8x8 mmx sse2 specialize vp9_sad8x8 mmx sse2
# TODO(jingning): need to covert these functions into mmx/sse2 form
prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad8x4
prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad4x8
prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad4x4 mmx sse specialize vp9_sad4x4 mmx sse
...@@ -509,6 +529,13 @@ specialize vp9_sad8x16x4d sse2 ...@@ -509,6 +529,13 @@ specialize vp9_sad8x16x4d sse2
prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad8x8x4d sse2 specialize vp9_sad8x8x4d sse2
# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad8x4x4d
prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x8x4d
prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x4d sse specialize vp9_sad4x4x4d sse
prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
......
...@@ -1597,11 +1597,15 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { ...@@ -1597,11 +1597,15 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL, vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL, BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4,
NULL, NULL, NULL, NULL, NULL, NULL, NULL) vp9_sub_pixel_avg_variance8x4, NULL, NULL,
NULL, NULL, NULL,
vp9_sad8x4x4d)
BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL, BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8,
NULL, NULL, NULL, NULL, NULL, NULL, NULL) vp9_sub_pixel_avg_variance4x8, NULL, NULL,
NULL, NULL, NULL,
vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4, BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL, vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
......
...@@ -1096,6 +1096,50 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { ...@@ -1096,6 +1096,50 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
return r; return r;
} }
static enum BlockSize get_block_size(int bw, int bh) {
if (bw == 4 && bh == 4)
return BLOCK_4X4;
if (bw == 4 && bh == 8)
return BLOCK_4X8;
if (bw == 8 && bh == 4)
return BLOCK_8X4;
if (bw == 8 && bh == 8)
return BLOCK_8X8;
if (bw == 8 && bh == 16)
return BLOCK_8X16;
if (bw == 16 && bh == 8)
return BLOCK_16X8;
if (bw == 16 && bh == 16)
return BLOCK_16X16;
if (bw == 32 && bh == 32)
return BLOCK_32X32;
if (bw == 32 && bh == 16)
return BLOCK_32X16;
if (bw == 16 && bh == 32)
return BLOCK_16X32;
if (bw == 64 && bh == 32)
return BLOCK_64X32;
if (bw == 32 && bh == 64)
return BLOCK_32X64;
if (bw == 64 && bh == 64)
return BLOCK_64X64;
assert(0);
return -1;
}
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi, BEST_SEG_INFO *bsi,
int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
...@@ -1111,6 +1155,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1111,6 +1155,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int sbr = 0, sbd = 0; int sbr = 0, sbd = 0;
int segmentyrate = 0; int segmentyrate = 0;
int best_eobs[4] = { 0 }; int best_eobs[4] = { 0 };
#if CONFIG_AB4X4
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
#endif
vp9_variance_fn_ptr_t *v_fn_ptr; vp9_variance_fn_ptr_t *v_fn_ptr;
...@@ -1120,7 +1168,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1120,7 +1168,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
#if CONFIG_AB4X4
v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
#else
v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4]; v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
#endif
// 64 makes this threshold really big effectively // 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on // making it so that we very rarely check mvs on
...@@ -1670,51 +1722,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1670,51 +1722,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
frame_type, block_size); frame_type, block_size);
} }
static enum BlockSize get_block_size(int bw, int bh) {
if (bw == 4 && bh == 4)
return BLOCK_4X4;
if (bw == 4 && bh == 8)
return BLOCK_4X8;
if (bw == 8 && bh == 4)
return BLOCK_8X4;
if (bw == 8 && bh == 8)
return BLOCK_8X8;
if (bw == 8 && bh == 16)
return BLOCK_8X16;
if (bw == 16 && bh == 8)
return BLOCK_16X8;
if (bw == 16 && bh == 16)
return BLOCK_16X16;
if (bw == 32 && bh == 32)
return BLOCK_32X32;
if (bw == 32 && bh == 16)
return BLOCK_32X16;
if (bw == 16 && bh == 32)
return BLOCK_16X32;
if (bw == 64 && bh == 32)
return BLOCK_64X32;
if (bw == 32 && bh == 64)
return BLOCK_32X64;
if (bw == 64 && bh == 64)
return BLOCK_64X64;
assert(0);
return -1;
}
static void model_rd_from_var_lapndz(int var, int n, int qstep, static void model_rd_from_var_lapndz(int var, int n, int qstep,
int *rate, int *dist) { int *rate, int *dist) {
// This function models the rate and distortion for a Laplacian // This function models the rate and distortion for a Laplacian
......
...@@ -156,6 +156,21 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, ...@@ -156,6 +156,21 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr,
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16); return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
} }
unsigned int vp9_sad8x4_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 4);
}
unsigned int vp9_sad4x8_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 8);
}
unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, unsigned int vp9_sad4x4_c(const uint8_t *src_ptr,
int src_stride, int src_stride,
...@@ -563,6 +578,36 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr, ...@@ -563,6 +578,36 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr,
ref_ptr[3], ref_stride, 0x7fffffff); ref_ptr[3], ref_stride, 0x7fffffff);
} }
void vp9_sad8x4x4d_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
ref_ptr[0], ref_stride, 0x7fffffff);
sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
ref_ptr[1], ref_stride, 0x7fffffff);
sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
ref_ptr[2], ref_stride, 0x7fffffff);
sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
ref_ptr[0], ref_stride, 0x7fffffff);
sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
ref_ptr[1], ref_stride, 0x7fffffff);
sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
ref_ptr[2], ref_stride, 0x7fffffff);
sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad4x4x4d_c(const uint8_t *src_ptr, void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
int src_stride, int src_stride,
const uint8_t* const ref_ptr[], const uint8_t* const ref_ptr[],
......
...@@ -820,3 +820,91 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, ...@@ -820,3 +820,91 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse); return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
} }
unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
return vp9_variance8x4_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
return vp9_variance8x4_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 4, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
return vp9_variance4x8_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 4, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
return vp9_variance4x8_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment