OSUOSL/Nero are experiencing Internet connectivity problems. This affects us as we're hosted with OSUOSL. We apologize for the inconvenience.

Commit 14b04437 authored by Yaowu Xu's avatar Yaowu Xu

Merge branch 'master' into nextgenv2

parents 7c6144bc c84d3abe
......@@ -141,7 +141,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, 8),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
......@@ -155,14 +155,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
......@@ -176,7 +176,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
......@@ -194,7 +194,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
10),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
......@@ -211,14 +211,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
10),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
......@@ -233,7 +233,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 10),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 10),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
......@@ -251,7 +251,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
12),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
......@@ -268,14 +268,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
12),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
......@@ -290,7 +290,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 12),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 12),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
......
......@@ -190,7 +190,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
BLOCK_SIZE bsize,
int64_t rate,
int64_t dist,
int skip) {
int skip,
struct macroblock_plane *const p) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
......@@ -198,12 +199,33 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
bsize);
int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
int is_skin = 0;
if (refresh_this_block == 0 &&
bsize <= BLOCK_16X16 &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
// Take center pixel in block to determine is_skin.
const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
const int uv_width_shift = y_width_shift >> 1;
const int uv_height_shift = y_height_shift >> 1;
const int stride = p[0].src.stride;
const int strideuv = p[1].src.stride;
const uint8_t ysource =
p[0].src.buf[y_height_shift * stride + y_width_shift];
const uint8_t usource =
p[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
const uint8_t vsource =
p[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
is_skin = vp9_skin_pixel(ysource, usource, vsource);
if (is_skin)
refresh_this_block = 1;
}
// If this block is labeled for refresh, check if we should reset the
// segment_id.
if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
......
......@@ -14,6 +14,8 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_block.h"
#include "vp9/encoder/vp9_skin_detection.h"
#ifdef __cplusplus
extern "C" {
......@@ -93,7 +95,8 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i,
void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
MB_MODE_INFO *const mbmi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip);
int64_t rate, int64_t dist, int skip,
struct macroblock_plane *const p);
void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
const MB_MODE_INFO *const mbmi,
......
......@@ -1045,7 +1045,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
mi_col, bsize, ctx->rate, ctx->dist,
x->skip);
x->skip, p);
}
}
......@@ -1705,6 +1705,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = x->plane;
const struct segmentation *const seg = &cm->seg;
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
......@@ -1725,7 +1726,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
} else {
// Setting segmentation map for cyclic_refresh.
vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize,
ctx->rate, ctx->dist, x->skip);
ctx->rate, ctx->dist, x->skip, p);
}
vp9_init_plane_quantizers(cpi, x);
}
......
......@@ -1349,11 +1349,25 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[ref].stride)];
const int bw = b_width_log2_lookup[BLOCK_8X8];
const int h = 4 * (i >> bw);
const int w = 4 * (i & ((1 << bw) - 1));
const struct scale_factors *sf = &xd->block_refs[ref]->sf;
int y_stride = pd->pre[ref].stride;
uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
if (vp9_is_scaled(sf)) {
const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
y_stride = xd->block_refs[ref]->buf->y_stride;
pre = xd->block_refs[ref]->buf->y_buffer;
pre += scaled_buffer_offset(x_start + w, y_start + h,
y_stride, sf);
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
vp9_highbd_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height,
......@@ -1361,7 +1375,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
} else {
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
......@@ -1370,7 +1384,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_row * MI_SIZE + 4 * (i / 2));
}
#else
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
......
......@@ -291,10 +291,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
......
......@@ -17,24 +17,20 @@ pw_16: times 4 dd 16
pw_32: times 4 dd 32
SECTION .text
INIT_MMX sse
INIT_XMM sse2
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq
movq m0, [aboveq]
movq m2, [leftq]
DEFINE_ARGS dst, stride, one
mov oned, 0x0001
pxor m1, m1
movd m3, oned
pshufw m3, m3, 0x0
paddw m0, m2
pmaddwd m0, m3
packssdw m0, m1
pmaddwd m0, m3
pshuflw m1, m0, 0xe
paddw m0, m1
pshuflw m1, m0, 0x1
paddw m0, m1
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
pshufw m0, m0, 0x0
pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq*2], m0
lea dstq, [dstq+strideq*4]
......@@ -261,43 +257,44 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
jnz .loop
REP_RET
INIT_MMX sse
cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
INIT_XMM sse2
cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps
movd m1, [aboveq-2]
movq m0, [aboveq]
pshufw m1, m1, 0x0
pshuflw m1, m1, 0x0
movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4
movlhps m1, m1 ; tl tl tl tl tl tl tl tl
; Get the values to compute the maximum value at this bit depth
mov oned, 1
movd m3, oned
pcmpeqw m3, m3
movd m4, bpsd
pshufw m3, m3, 0x0
DEFINE_ARGS dst, stride, line, left
mov lineq, -2
mova m2, m3
psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl
psllw m3, m4
add leftq, 8
psubw m3, m2 ; max possible value
pxor m4, m4 ; min possible value
psubw m0, m1
.loop:
movq m1, [leftq+lineq*4]
movq m2, [leftq+lineq*4+2]
pshufw m1, m1, 0x0
pshufw m2, m2, 0x0
paddw m1, m0
pcmpeqw m2, m2
pxor m4, m4 ; min possible value
pxor m3, m2 ; max possible value
mova m1, [leftq]
pshuflw m2, m1, 0x0
pshuflw m5, m1, 0x55
movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2
paddw m2, m0
;Clamp to the bit-depth
pminsw m1, m3
pminsw m2, m3
pmaxsw m1, m4
pmaxsw m2, m4
;Store the values
movq [dstq ], m1
movq [dstq+strideq*2], m2
movq [dstq ], m2
movhpd [dstq+strideq*2], m2
lea dstq, [dstq+strideq*4]
inc lineq
jnz .loop
REP_RET
pshuflw m2, m1, 0xaa
pshuflw m5, m1, 0xff
movlhps m2, m5
paddw m2, m0
;Clamp to the bit-depth
pminsw m2, m3
pmaxsw m2, m4
;Store the values
movq [dstq ], m2
movhpd [dstq+strideq*2], m2
RET
INIT_XMM sse2
cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment