Commit 46639567 authored by Scott LaVarnway's avatar Scott LaVarnway Committed by Gerrit Code Review
Browse files

Merge "Change use of eob in the encoder"

parents 8002c318 e4f2ec7a
...@@ -187,7 +187,7 @@ typedef struct ...@@ -187,7 +187,7 @@ typedef struct
int dst; int dst;
int dst_stride; int dst_stride;
int eob; char *eob;
union b_mode_info bmi; union b_mode_info bmi;
} BLOCKD; } BLOCKD;
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b,
int pitch) int pitch)
{ {
if (b->eob > 1) if (*b->eob > 1)
{ {
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch, IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch,
*(b->base_dst) + b->dst, b->dst_stride); *(b->base_dst) + b->dst, b->dst_stride);
...@@ -65,6 +65,3 @@ void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD ...@@ -65,6 +65,3 @@ void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD
} }
} }
...@@ -118,6 +118,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) ...@@ -118,6 +118,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{ {
x->block[r].qcoeff = x->qcoeff + r * 16; x->block[r].qcoeff = x->qcoeff + r * 16;
x->block[r].dqcoeff = x->dqcoeff + r * 16; x->block[r].dqcoeff = x->dqcoeff + r * 16;
x->block[r].eob = x->eobs + r;
} }
} }
......
...@@ -102,9 +102,10 @@ loop ...@@ -102,9 +102,10 @@ loop
bne loop bne loop
; PART 2: check position for eob... ; PART 2: check position for eob...
ldr r11, [sp, #0] ; restore BLOCKD pointer
mov lr, #0 ; init eob mov lr, #0 ; init eob
cmp r1, #0 ; coeffs after quantization? cmp r1, #0 ; coeffs after quantization?
ldr r11, [sp, #0] ; restore BLOCKD pointer ldr r12, [r11, #vp8_blockd_eob]
beq end ; skip eob calculations if all zero beq end ; skip eob calculations if all zero
ldr r0, [r11, #vp8_blockd_qcoeff] ldr r0, [r11, #vp8_blockd_qcoeff]
...@@ -212,7 +213,7 @@ quant_coeff_1_0 ...@@ -212,7 +213,7 @@ quant_coeff_1_0
mov lr, #1 ; rc=0, i=0 mov lr, #1 ; rc=0, i=0
end end
str lr, [r11, #vp8_blockd_eob] strb lr, [r12]
ldmfd sp!, {r1, r4-r11, pc} ldmfd sp!, {r1, r4-r11, pc}
ENDP ENDP
......
...@@ -135,17 +135,16 @@ ...@@ -135,17 +135,16 @@
vmovl.u16 q0, d0 vmovl.u16 q0, d0
vmovl.u16 q10, d20 vmovl.u16 q10, d20
vmax.u32 d0, d0, d1 vmax.u32 d0, d0, d1
vmax.u32 d20, d20, d21 vmax.u32 d20, d20, d21
vpmax.u32 d0, d0, d0 vpmax.u32 d0, d0, d0
vpmax.u32 d20, d20, d20 vpmax.u32 d20, d20, d20
add r4, r2, #vp8_blockd_eob ldr r4, [r2, #vp8_blockd_eob]
add r5, r3, #vp8_blockd_eob ldr r5, [r3, #vp8_blockd_eob]
vst1.32 {d0[0]}, [r4@32] vst1.8 {d0[0]}, [r4] ; store eob
vst1.32 {d20[0]}, [r5@32] vst1.8 {d20[0]}, [r5] ; store eob
vldmia sp!, {q4-q7} vldmia sp!, {q4-q7}
ldmfd sp!, {r4-r9} ldmfd sp!, {r4-r9}
...@@ -196,6 +195,8 @@ ...@@ -196,6 +195,8 @@
vshr.s16 q12, #1 ; right shift 1 after vqdmulh vshr.s16 q12, #1 ; right shift 1 after vqdmulh
vshr.s16 q13, #1 vshr.s16 q13, #1
ldr r5, [r1, #vp8_blockd_eob]
orr r2, r2, r3 ; check if all zero (step 4) orr r2, r2, r3 ; check if all zero (step 4)
cmp r2, #0 ; check if all zero (step 5) cmp r2, #0 ; check if all zero (step 5)
beq zero_output ; check if all zero (step 6) beq zero_output ; check if all zero (step 6)
...@@ -230,14 +231,13 @@ ...@@ -230,14 +231,13 @@
vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant
add r4, r1, #vp8_blockd_eob vst1.8 {d0[0]}, [r5] ; store eob
vst1.32 {d0[0]}, [r4@32]
ldmfd sp!, {r4-r7} ldmfd sp!, {r4-r7}
bx lr bx lr
zero_output zero_output
str r2, [r1, #vp8_blockd_eob] strb r2, [r5] ; store eob
vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0 vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0
vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0 vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
......
...@@ -46,7 +46,7 @@ void vp8_quantize_mb_neon(MACROBLOCK *x) ...@@ -46,7 +46,7 @@ void vp8_quantize_mb_neon(MACROBLOCK *x)
&x->e_mbd.block[i], &x->e_mbd.block[i+1]); &x->e_mbd.block[i], &x->e_mbd.block[i+1]);
if (has_2nd_order) if (has_2nd_order)
x->quantize_b(&x->block[i], &x->e_mbd.block[i]); x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
} }
......
...@@ -274,7 +274,7 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type, ...@@ -274,7 +274,7 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type,
qcoeff_ptr = d->qcoeff; qcoeff_ptr = d->qcoeff;
dqcoeff_ptr = d->dqcoeff; dqcoeff_ptr = d->dqcoeff;
i0 = !type; i0 = !type;
eob = d->eob; eob = *d->eob;
/* Now set up a Viterbi trellis to evaluate alternative roundings. */ /* Now set up a Viterbi trellis to evaluate alternative roundings. */
rdmult = mb->rdmult * err_mult; rdmult = mb->rdmult * err_mult;
...@@ -466,8 +466,8 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type, ...@@ -466,8 +466,8 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type,
} }
final_eob++; final_eob++;
d->eob = final_eob; *a = *l = (final_eob != !type);
*a = *l = (d->eob != !type); *d->eob = (char)final_eob;
} }
static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
...@@ -650,7 +650,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, ...@@ -650,7 +650,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd,
{ {
BLOCKD *b = &x->block[i]; BLOCKD *b = &x->block[i];
if (b->eob > 1) if (*b->eob > 1)
{ {
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 16, IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 16,
*(b->base_dst) + b->dst, b->dst_stride); *(b->base_dst) + b->dst, b->dst_stride);
...@@ -667,7 +667,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, ...@@ -667,7 +667,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd,
{ {
BLOCKD *b = &x->block[i]; BLOCKD *b = &x->block[i];
if (b->eob > 1) if (*b->eob > 1)
{ {
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 8, IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 8,
*(b->base_dst) + b->dst, b->dst_stride); *(b->base_dst) + b->dst, b->dst_stride);
......
...@@ -62,7 +62,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) ...@@ -62,7 +62,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
} }
} }
} }
d->eob = eob + 1; *d->eob = (char)(eob + 1);
} }
#else #else
...@@ -97,7 +97,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) ...@@ -97,7 +97,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
eob = i; // last nonzero coeffs eob = i; // last nonzero coeffs
} }
} }
d->eob = eob + 1; *d->eob = (char)(eob + 1);
} }
#endif #endif
...@@ -152,7 +152,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) ...@@ -152,7 +152,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
} }
} }
d->eob = eob + 1; *d->eob = (char)(eob + 1);
} }
/* Perform regular quantization, with unbiased rounding and no zero bin. */ /* Perform regular quantization, with unbiased rounding and no zero bin. */
...@@ -210,7 +210,7 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d) ...@@ -210,7 +210,7 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
} }
} }
d->eob = eob + 1; *d->eob = (char)(eob + 1);
} }
#else #else
...@@ -264,7 +264,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) ...@@ -264,7 +264,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
} }
} }
d->eob = eob + 1; *d->eob = (char)(eob + 1);
} }
#endif #endif
...@@ -731,4 +731,3 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) ...@@ -731,4 +731,3 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
vp8cx_init_quantizer(cpi); vp8cx_init_quantizer(cpi);
} }
...@@ -485,7 +485,7 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) ...@@ -485,7 +485,7 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
{ {
int c = !type; /* start at coef 0, unless Y with Y2 */ int c = !type; /* start at coef 0, unless Y with Y2 */
int eob = b->eob; int eob = (int)(*b->eob);
int pt ; /* surrounding block/prev coef predictor */ int pt ; /* surrounding block/prev coef predictor */
int cost = 0; int cost = 0;
short *qcoeff_ptr = b->qcoeff; short *qcoeff_ptr = b->qcoeff;
...@@ -1299,11 +1299,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, ...@@ -1299,11 +1299,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
// store everything needed to come back to this!! // store everything needed to come back to this!!
for (i = 0; i < 16; i++) for (i = 0; i < 16; i++)
{ {
BLOCKD *bd = &x->e_mbd.block[i];
bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
bsi->modes[i] = x->partition_info->bmi[i].mode; bsi->modes[i] = x->partition_info->bmi[i].mode;
bsi->eobs[i] = bd->eob; bsi->eobs[i] = x->e_mbd.eobs[i];
} }
} }
} }
...@@ -1432,7 +1430,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, ...@@ -1432,7 +1430,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
BLOCKD *bd = &x->e_mbd.block[i]; BLOCKD *bd = &x->e_mbd.block[i];
bd->bmi.mv.as_int = bsi.mvs[i].as_int; bd->bmi.mv.as_int = bsi.mvs[i].as_int;
bd->eob = bsi.eobs[i]; *bd->eob = bsi.eobs[i];
} }
*returntotrate = bsi.r; *returntotrate = bsi.r;
...@@ -2271,7 +2269,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int ...@@ -2271,7 +2269,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
for (i = 0; i <= 24; i++) for (i = 0; i <= 24; i++)
{ {
tteob += x->e_mbd.block[i].eob; tteob += x->e_mbd.eobs[i];
} }
if (tteob == 0) if (tteob == 0)
......
...@@ -108,15 +108,16 @@ static void tokenize2nd_order_b ...@@ -108,15 +108,16 @@ static void tokenize2nd_order_b
ENTROPY_CONTEXT * a; ENTROPY_CONTEXT * a;
ENTROPY_CONTEXT * l; ENTROPY_CONTEXT * l;
int band, rc, v, token; int band, rc, v, token;
int eob;
b = x->block + 24; b = x->block + 24;
qcoeff_ptr = b->qcoeff; qcoeff_ptr = b->qcoeff;
a = (ENTROPY_CONTEXT *)x->above_context + 8; a = (ENTROPY_CONTEXT *)x->above_context + 8;
l = (ENTROPY_CONTEXT *)x->left_context + 8; l = (ENTROPY_CONTEXT *)x->left_context + 8;
eob = x->eobs[24];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
if(!b->eob) if(!eob)
{ {
/* c = band for this case */ /* c = band for this case */
t->Token = DCT_EOB_TOKEN; t->Token = DCT_EOB_TOKEN;
...@@ -142,7 +143,7 @@ static void tokenize2nd_order_b ...@@ -142,7 +143,7 @@ static void tokenize2nd_order_b
t++; t++;
c = 1; c = 1;
for (; c < b->eob; c++) for (; c < eob; c++)
{ {
rc = vp8_default_zig_zag1d[c]; rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c]; band = vp8_coef_bands[c];
...@@ -213,7 +214,7 @@ static void tokenize1st_order_b ...@@ -213,7 +214,7 @@ static void tokenize1st_order_b
c = type ? 0 : 1; c = type ? 0 : 1;
if(c >= b->eob) if(c >= *b->eob)
{ {
/* c = band for this case */ /* c = band for this case */
t->Token = DCT_EOB_TOKEN; t->Token = DCT_EOB_TOKEN;
...@@ -240,7 +241,7 @@ static void tokenize1st_order_b ...@@ -240,7 +241,7 @@ static void tokenize1st_order_b
t++; t++;
c++; c++;
for (; c < b->eob; c++) for (; c < *b->eob; c++)
{ {
rc = vp8_default_zig_zag1d[c]; rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c]; band = vp8_coef_bands[c];
...@@ -284,7 +285,7 @@ static void tokenize1st_order_b ...@@ -284,7 +285,7 @@ static void tokenize1st_order_b
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
if(!b->eob) if(!(*b->eob))
{ {
/* c = band for this case */ /* c = band for this case */
t->Token = DCT_EOB_TOKEN; t->Token = DCT_EOB_TOKEN;
...@@ -311,7 +312,7 @@ static void tokenize1st_order_b ...@@ -311,7 +312,7 @@ static void tokenize1st_order_b
t++; t++;
c = 1; c = 1;
for (; c < b->eob; c++) for (; c < *b->eob; c++)
{ {
rc = vp8_default_zig_zag1d[c]; rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c]; band = vp8_coef_bands[c];
...@@ -356,11 +357,11 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) ...@@ -356,11 +357,11 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
if (has_y2_block) if (has_y2_block)
{ {
for (i = 0; i < 16; i++) for (i = 0; i < 16; i++)
skip &= (x->block[i].eob < 2); skip &= (x->eobs[i] < 2);
} }
for (; i < 24 + has_y2_block; i++) for (; i < 24 + has_y2_block; i++)
skip &= (!x->block[i].eob); skip &= (!x->eobs[i]);
return skip; return skip;
} }
......
...@@ -194,6 +194,8 @@ ZIGZAG_LOOP 15 ...@@ -194,6 +194,8 @@ ZIGZAG_LOOP 15
movdqa [rdi], xmm0 ; store dqcoeff movdqa [rdi], xmm0 ; store dqcoeff
movdqa [rdi + 16], xmm1 movdqa [rdi + 16], xmm1
mov rcx, [rsi + vp8_blockd_eob]
; select the last value (in zig_zag order) for EOB ; select the last value (in zig_zag order) for EOB
pcmpeqw xmm2, xmm6 pcmpeqw xmm2, xmm6
pcmpeqw xmm3, xmm6 pcmpeqw xmm3, xmm6
...@@ -214,7 +216,8 @@ ZIGZAG_LOOP 15 ...@@ -214,7 +216,8 @@ ZIGZAG_LOOP 15
pmaxsw xmm2, xmm3 pmaxsw xmm2, xmm3
movd eax, xmm2 movd eax, xmm2
and eax, 0xff and eax, 0xff
mov [rsi + vp8_blockd_eob], eax
mov BYTE PTR [rcx], al ; store eob
; begin epilog ; begin epilog
add rsp, stack_size add rsp, stack_size
...@@ -337,6 +340,8 @@ sym(vp8_fast_quantize_b_sse2): ...@@ -337,6 +340,8 @@ sym(vp8_fast_quantize_b_sse2):
pmaxsw xmm1, xmm5 pmaxsw xmm1, xmm5
mov rcx, [rsi + vp8_blockd_eob]
; now down to 8 ; now down to 8
pshufd xmm5, xmm1, 00001110b pshufd xmm5, xmm1, 00001110b
...@@ -354,7 +359,8 @@ sym(vp8_fast_quantize_b_sse2): ...@@ -354,7 +359,8 @@ sym(vp8_fast_quantize_b_sse2):
movd eax, xmm1 movd eax, xmm1
and eax, 0xff and eax, 0xff
mov [rsi + vp8_blockd_eob], eax
mov BYTE PTR [rcx], al ; store eob
; begin epilog ; begin epilog
%if ABI_IS_32BIT %if ABI_IS_32BIT
......
...@@ -208,6 +208,8 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 ...@@ -208,6 +208,8 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
movdqa [rdi], xmm0 movdqa [rdi], xmm0
movdqa [rdi + 16], xmm1 movdqa [rdi + 16], xmm1
mov rcx, [rsi + vp8_blockd_eob]
; select the last value (in zig_zag order) for EOB ; select the last value (in zig_zag order) for EOB
pxor xmm6, xmm6 pxor xmm6, xmm6
pcmpeqw xmm4, xmm6 pcmpeqw xmm4, xmm6
...@@ -225,7 +227,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 ...@@ -225,7 +227,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
add eax, 1 add eax, 1
and eax, edi and eax, edi
mov [rsi + vp8_blockd_eob], eax mov BYTE PTR [rcx], al ; store eob
; begin epilog ; begin epilog
%if ABI_IS_32BIT %if ABI_IS_32BIT
......
...@@ -110,12 +110,14 @@ sym(vp8_fast_quantize_b_ssse3): ...@@ -110,12 +110,14 @@ sym(vp8_fast_quantize_b_ssse3):
movdqa [rcx], xmm2 ;store dqcoeff movdqa [rcx], xmm2 ;store dqcoeff
movdqa [rcx + 16], xmm3 ;store dqcoeff movdqa [rcx + 16], xmm3 ;store dqcoeff
mov rcx, [rsi + vp8_blockd_eob]
sub edi, edx ;check for all zeros in bit mask sub edi, edx ;check for all zeros in bit mask
sar edi, 31 ;0 or -1 sar edi, 31 ;0 or -1
add eax, 1 add eax, 1
and eax, edi ;if the bit mask was all zero, and eax, edi ;if the bit mask was all zero,
;then eob = 0 ;then eob = 0
mov [rsi + vp8_blockd_eob], eax mov BYTE PTR [rcx], al ;store eob
; begin epilog ; begin epilog
%if ABI_IS_32BIT %if ABI_IS_32BIT
......
...@@ -37,17 +37,17 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) ...@@ -37,17 +37,17 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
short *dqcoeff_ptr = d->dqcoeff; short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant; short *dequant_ptr = d->dequant;
d->eob = vp8_fast_quantize_b_impl_mmx( *d->eob = (char)vp8_fast_quantize_b_impl_mmx(
coeff_ptr, coeff_ptr,
zbin_ptr, zbin_ptr,
qcoeff_ptr, qcoeff_ptr,
dequant_ptr, dequant_ptr,
scan_mask, scan_mask,
round_ptr, round_ptr,