Commit 294777b9 authored by Johann's avatar Johann Committed by Gerrit Code Review
Browse files

Merge "Reduce partial frame copy in encoder's pick_filter_level_fast"

parents efa69d26 de828094
...@@ -506,7 +506,8 @@ void vp8_loop_filter_partial_frame ...@@ -506,7 +506,8 @@ void vp8_loop_filter_partial_frame
unsigned char *y_ptr; unsigned char *y_ptr;
int mb_row; int mb_row;
int mb_col; int mb_col;
int mb_cols = post->y_width >> 4; int mb_cols = post->y_width >> 4;
int mb_rows = post->y_height >> 4;
int linestocopy, i; int linestocopy, i;
...@@ -521,15 +522,9 @@ void vp8_loop_filter_partial_frame ...@@ -521,15 +522,9 @@ void vp8_loop_filter_partial_frame
int lvl_seg[MAX_MB_SEGMENTS]; int lvl_seg[MAX_MB_SEGMENTS];
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* number of MB rows to use in partial filtering */
linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
/* 3 is a magic number. 4 is probably magic too */ linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
linestocopy = (post->y_height >> (4 + 3));
if (linestocopy < 1)
linestocopy = 1;
linestocopy <<= 4;
/* Note the baseline filter values for each segment */ /* Note the baseline filter values for each segment */
/* See vp8_loop_filter_frame_init. Rather than call that for each change /* See vp8_loop_filter_frame_init. Rather than call that for each change
...@@ -554,8 +549,9 @@ void vp8_loop_filter_partial_frame ...@@ -554,8 +549,9 @@ void vp8_loop_filter_partial_frame
} }
} }
/* Set up the buffer pointers */ /* Set up the buffer pointers; partial image starts at ~middle of frame */
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride; y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
/* vp8_filter each macro block */ /* vp8_filter each macro block */
for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++) for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
......
...@@ -15,7 +15,10 @@ ...@@ -15,7 +15,10 @@
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#include "vpx_config.h" #include "vpx_config.h"
#define MAX_LOOP_FILTER 63 #define MAX_LOOP_FILTER 63
/* fraction of total macroblock rows to be used in fast filter level picking */
/* has to be > 2 */
#define PARTIAL_FRAME_FRACTION 8
typedef enum typedef enum
{ {
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "vp8/encoder/variance.h" #include "vp8/encoder/variance.h"
#include "vp8/encoder/onyx_int.h" #include "vp8/encoder/onyx_int.h"
extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); extern void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
void vp8_arch_arm_encoder_init(VP8_COMP *cpi) void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
{ {
...@@ -123,15 +123,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) ...@@ -123,15 +123,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;
cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_neon; cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_neon;
} }
#endif #endif /* HAVE_ARMV7 */
#endif /* CONFIG_RUNTIME_CPU_DETECT */
#if HAVE_ARMV7 #if HAVE_ARMV7
#if CONFIG_RUNTIME_CPU_DETECT #if CONFIG_RUNTIME_CPU_DETECT
if (flags & HAS_NEON) if (flags & HAS_NEON)
#endif #endif
{ {
vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame_neon;
} }
#endif #endif
#endif
} }
...@@ -8,20 +8,16 @@ ...@@ -8,20 +8,16 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "vp8/common/loopfilter.h"
#include "vpx_scale/yv12config.h"
#include "vp8/common/onyxc_int.h" extern void vp8_memcpy_partial_neon(unsigned char *dst_ptr,
#include "vp8/encoder/onyx_int.h" unsigned char *src_ptr,
#include "vp8/encoder/quantize.h" int sz);
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/yv12extend.h"
#include "vpx_scale/vpxscale.h"
#include "vp8/common/alloccommon.h"
extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
void YV12_BUFFER_CONFIG *dst_ybc)
vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
{ {
unsigned char *src_y, *dst_y; unsigned char *src_y, *dst_y;
int yheight; int yheight;
...@@ -34,17 +30,19 @@ vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG ...@@ -34,17 +30,19 @@ vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG
yheight = src_ybc->y_height; yheight = src_ybc->y_height;
ystride = src_ybc->y_stride; ystride = src_ybc->y_stride;
linestocopy = (yheight >> (Fraction + 4)); /* number of MB rows to use in partial filtering */
linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
if (linestocopy < 1) linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
linestocopy = 1;
/* Copy extra 4 so that full filter context is available if filtering done
linestocopy <<= 4; * on the copied partial frame and not original. Partial filter does mb
* filtering for top row also, which can modify3 pixels above.
yoffset = ystride * ((yheight >> 5) * 16 - 8); */
linestocopy += 4;
/* partial image starts at ~middle of frame (macroblock border) */
yoffset = ystride * (((yheight >> 5) * 16) - 4);
src_y = src_ybc->y_buffer + yoffset; src_y = src_ybc->y_buffer + yoffset;
dst_y = dst_ybc->y_buffer + yoffset; dst_y = dst_ybc->y_buffer + yoffset;
//vpx_memcpy (dst_y, src_y, ystride * (linestocopy +16)); vp8_memcpy_partial_neon(dst_y, src_y, ystride * linestocopy);
vp8_memcpy_neon((unsigned char *)dst_y, (unsigned char *)src_y, (int)(ystride *(linestocopy + 16)));
} }
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
; ;
EXPORT |vp8_memcpy_neon| EXPORT |vp8_memcpy_partial_neon|
ARM ARM
REQUIRE8 REQUIRE8
...@@ -17,8 +17,10 @@ ...@@ -17,8 +17,10 @@
AREA ||.text||, CODE, READONLY, ALIGN=2 AREA ||.text||, CODE, READONLY, ALIGN=2
;========================================= ;=========================================
;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); ;this is not a full memcpy function!!!
|vp8_memcpy_neon| PROC ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
; int sz);
|vp8_memcpy_partial_neon| PROC
;pld [r1] ;preload pred data ;pld [r1] ;preload pred data
;pld [r1, #128] ;pld [r1, #128]
;pld [r1, #256] ;pld [r1, #256]
......
...@@ -17,8 +17,10 @@ ...@@ -17,8 +17,10 @@
void vp8_arch_x86_encoder_init(VP8_COMP *cpi); void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
void vp8_arch_arm_encoder_init(VP8_COMP *cpi); void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); YV12_BUFFER_CONFIG *dst_ybc);
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
YV12_BUFFER_CONFIG *dst_ybc);
void vp8_cmachine_specific_config(VP8_COMP *cpi) void vp8_cmachine_specific_config(VP8_COMP *cpi)
{ {
......
...@@ -29,12 +29,11 @@ extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, ...@@ -29,12 +29,11 @@ extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
#define IF_RTCD(x) NULL #define IF_RTCD(x) NULL
#endif #endif
extern void extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
(*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
YV12_BUFFER_CONFIG *dst_ybc,
int Fraction); void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
void YV12_BUFFER_CONFIG *dst_ybc)
vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
{ {
unsigned char *src_y, *dst_y; unsigned char *src_y, *dst_y;
int yheight; int yheight;
...@@ -47,21 +46,26 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst ...@@ -47,21 +46,26 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst
yheight = src_ybc->y_height; yheight = src_ybc->y_height;
ystride = src_ybc->y_stride; ystride = src_ybc->y_stride;
linestocopy = (yheight >> (Fraction + 4)); /* number of MB rows to use in partial filtering */
linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
if (linestocopy < 1) linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
linestocopy = 1;
/* Copy extra 4 so that full filter context is available if filtering done
linestocopy <<= 4; * on the copied partial frame and not original. Partial filter does mb
* filtering for top row also, which can modify3 pixels above.
yoffset = ystride * ((yheight >> 5) * 16 - 8); */
linestocopy += 4;
/* partial image starts at ~middle of frame (macroblock border)*/
yoffset = ystride * (((yheight >> 5) * 16) - 4);
src_y = src_ybc->y_buffer + yoffset; src_y = src_ybc->y_buffer + yoffset;
dst_y = dst_ybc->y_buffer + yoffset; dst_y = dst_ybc->y_buffer + yoffset;
vpx_memcpy(dst_y, src_y, ystride *(linestocopy + 16)); vpx_memcpy(dst_y, src_y, ystride * linestocopy);
} }
static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd) static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
const vp8_variance_rtcd_vtable_t *rtcd)
{ {
int i, j; int i, j;
int Total = 0; int Total = 0;
...@@ -69,17 +73,16 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF ...@@ -69,17 +73,16 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
unsigned char *src = source->y_buffer; unsigned char *src = source->y_buffer;
unsigned char *dst = dest->y_buffer; unsigned char *dst = dest->y_buffer;
int linestocopy = (source->y_height >> (Fraction + 4)); int linestocopy;
(void)rtcd;
if (linestocopy < 1)
linestocopy = 1;
linestocopy <<= 4; /* number of MB rows to use in partial filtering */
linestocopy = (source->y_height >> 4) / PARTIAL_FRAME_FRACTION;
linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
srcoffset = source->y_stride * (dest->y_height >> 5) * 16; /* partial image starts at ~middle of frame (macroblock border)*/
dstoffset = dest->y_stride * (dest->y_height >> 5) * 16; srcoffset = source->y_stride * ((dest->y_height >> 5) * 16);
dstoffset = dest->y_stride * ((dest->y_height >> 5) * 16);
src += srcoffset; src += srcoffset;
dst += dstoffset; dst += dstoffset;
...@@ -90,7 +93,9 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF ...@@ -90,7 +93,9 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
for (j = 0; j < source->y_width; j += 16) for (j = 0; j < source->y_width; j += 16)
{ {
unsigned int sse; unsigned int sse;
Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse); Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride,
dst + j, dest->y_stride,
&sse);
} }
src += 16 * source->y_stride; src += 16 * source->y_stride;
...@@ -105,7 +110,8 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) ...@@ -105,7 +110,8 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
{ {
int min_filter_level; int min_filter_level;
if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame) if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame &&
!cpi->common.refresh_alt_ref_frame)
min_filter_level = 0; min_filter_level = 0;
else else
{ {
...@@ -148,7 +154,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) ...@@ -148,7 +154,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
int best_filt_val = cm->filter_level; int best_filt_val = cm->filter_level;
// Make a copy of the unfiltered / processed recon buffer // Make a copy of the unfiltered / processed recon buffer
vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf, 3); vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
if (cm->frame_type == KEY_FRAME) if (cm->frame_type == KEY_FRAME)
cm->sharpness_level = 0; cm->sharpness_level = 0;
...@@ -173,10 +179,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) ...@@ -173,10 +179,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Get the err using the previous frame's filter value. // Get the err using the previous frame's filter value.
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); best_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
// Re-instate the unfiltered frame // Re-instate the unfiltered frame
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
filt_val -= (1 + ((filt_val > 10) ? 1 : 0)); filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
...@@ -187,11 +193,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) ...@@ -187,11 +193,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
// Get the err for filtered frame // Get the err for filtered frame
filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
// Re-instate the unfiltered frame // Re-instate the unfiltered frame
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
// Update the best case record or exit loop. // Update the best case record or exit loop.
if (filt_err < best_err) if (filt_err < best_err)
...@@ -220,10 +225,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) ...@@ -220,10 +225,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
// Get the err for filtered frame // Get the err for filtered frame
filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
// Re-instate the unfiltered frame // Re-instate the unfiltered frame
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
// Update the best case record or exit loop. // Update the best case record or exit loop.
if (filt_err < best_err) if (filt_err < best_err)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment