Commit 15ea268d authored by Johann's avatar Johann Committed by Gerrit Code Review

Merge "Fix encoder partitioned output on ARM"

parents b79879c2 97259b46
......@@ -13,6 +13,7 @@
EXPORT |vp8_encode_bool|
EXPORT |vp8_stop_encode|
EXPORT |vp8_encode_value|
IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
......@@ -22,6 +23,20 @@
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 BOOL_CODER *br
; r1 unsigned char *source
; r2 unsigned char *source_end
......@@ -43,7 +58,7 @@
; r1 int bit
; r2 int probability
|vp8_encode_bool| PROC
push {r4-r9, lr}
push {r4-r10, lr}
mov r4, r2
......@@ -106,6 +121,9 @@ token_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r1, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r9, r1 ; validate_buffer at pos
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero
......@@ -114,7 +132,7 @@ token_count_lt_zero
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
pop {r4-r9, pc}
pop {r4-r10, pc}
ENDP
; r0 BOOL_CODER *br
......@@ -179,6 +197,9 @@ token_high_bit_not_set_se
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r1, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r9, r1 ; validate_buffer at pos
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero_se
......@@ -198,7 +219,7 @@ token_count_lt_zero_se
; r1 int data
; r2 int bits
|vp8_encode_value| PROC
push {r4-r11, lr}
push {r4-r12, lr}
mov r10, r2
......@@ -270,6 +291,9 @@ token_high_bit_not_set_ev
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r9, r11 ; validate_buffer at pos
strb r7, [r9, r4] ; w->buffer[w->pos++]
token_count_lt_zero_ev
......@@ -281,7 +305,7 @@ token_count_lt_zero_ev
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
pop {r4-r11, pc}
pop {r4-r12, pc}
ENDP
END
......@@ -10,6 +10,7 @@
EXPORT |vp8cx_pack_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
......@@ -19,6 +20,22 @@
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 vp8_writer *w
; r1 const TOKENEXTRA *p
; r2 int xcount
......@@ -26,11 +43,11 @@
; s0 vp8_extra_bits
; s1 vp8_coef_tree
|vp8cx_pack_tokens_armv5| PROC
push {r4-r11, lr}
push {r4-r12, lr}
sub sp, sp, #16
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
; sizeof (TOKENEXTRA) is 8
sub sp, sp, #12
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
str r2, [sp, #0]
str r3, [sp, #8] ; save vp8_coef_encodings
......@@ -57,7 +74,7 @@ while_p_lt_stop
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #52] ; vp8_coef_tree
ldr r10, [sp, #60] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
......@@ -128,12 +145,15 @@ token_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
ldr r10, [sp, #52] ; vp8_coef_tree
ldr r10, [sp, #60] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
......@@ -142,7 +162,7 @@ token_count_lt_zero
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #48] ; vp8_extra_bits
ldr r7, [sp, #56] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
......@@ -223,6 +243,9 @@ extra_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
......@@ -271,7 +294,10 @@ end_high_bit_not_set
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r12, [r0, #0x10]
str r12, [r0, #vp8_writer_pos]
VALIDATE_POS r7, r12 ; validate_buffer at pos
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
......@@ -284,8 +310,8 @@ check_p_lt_stop
str r2, [r0, #vp8_writer_lowvalue]
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
add sp, sp, #12
pop {r4-r11, pc}
add sp, sp, #16
pop {r4-r12, pc}
ENDP
END
......@@ -10,6 +10,7 @@
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
......@@ -19,6 +20,21 @@
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 VP8_COMP *cpi
; r1 vp8_writer *w
; r2 vp8_coef_encodings
......@@ -26,7 +42,7 @@
; s0 vp8_coef_tree
|vp8cx_pack_mb_row_tokens_armv5| PROC
push {r4-r11, lr}
push {r4-r12, lr}
sub sp, sp, #24
; Compute address of cpi->common.mb_rows
......@@ -79,7 +95,7 @@ while_p_lt_stop
subne r8, r8, #1 ; --n
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #60] ; vp8_coef_tree
ldr r10, [sp, #64] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
lsl r12, r6, r4 ; r12 = v << 32 - n
......@@ -150,12 +166,15 @@ token_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
; temp variable here. So after r10 is used, reload
; vp8_coef_tree_dcd into r10
ldr r10, [sp, #60] ; vp8_coef_tree
ldr r10, [sp, #64] ; vp8_coef_tree
token_count_lt_zero
lsl r2, r2, r6 ; lowvalue <<= shift
......@@ -245,6 +264,9 @@ extra_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
......@@ -293,7 +315,10 @@ end_high_bit_not_set
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r12, [r0, #0x10]
str r12, [r0, #vp8_writer_pos]
VALIDATE_POS r7, r12 ; validate_buffer at pos
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
......@@ -314,7 +339,7 @@ check_p_lt_stop
str r5, [r0, #vp8_writer_range]
str r3, [r0, #vp8_writer_count]
add sp, sp, #24
pop {r4-r11, pc}
pop {r4-r12, pc}
ENDP
_VP8_COMP_common_
......
......@@ -10,6 +10,7 @@
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
IMPORT |vp8_validate_buffer_arm|
INCLUDE asm_enc_offsets.asm
......@@ -19,17 +20,31 @@
AREA |.text|, CODE, READONLY
; macro for validating write buffer position
; needs vp8_writer in r0
; start shall not be in r1
MACRO
VALIDATE_POS $start, $pos
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
ldr r2, [r0, #vp8_writer_buffer_end]
ldr r3, [r0, #vp8_writer_error]
mov r1, $pos
mov r0, $start
bl vp8_validate_buffer_arm
pop {r0-r3, r12, lr}
MEND
; r0 VP8_COMP *cpi
; r1 unsigned char *cx_data
; r2 int num_part
; r3 *size
; r2 const unsigned char *cx_data_end
; r3 int num_part
; s0 vp8_coef_encodings
; s1 vp8_extra_bits,
; s2 const vp8_tree_index *,
; s2 const vp8_tree_index *
|vp8cx_pack_tokens_into_partitions_armv5| PROC
push {r4-r11, lr}
sub sp, sp, #44
push {r4-r12, lr}
sub sp, sp, #40
; Compute address of cpi->common.mb_rows
ldr r4, _VP8_COMP_common_
......@@ -39,31 +54,26 @@
ldr r5, [r4, r6] ; load up mb_rows
str r5, [sp, #36] ; save mb_rows
str r1, [sp, #24] ; save cx_data
str r2, [sp, #20] ; save num_part
str r3, [sp, #8] ; save *size
; *size = 3*(num_part -1 );
sub r2, r2, #1 ; num_part - 1
add r2, r2, r2, lsl #1 ; 3*(num_part - 1)
str r2, [r3]
add r2, r2, r1 ; cx_data + *size
str r2, [sp, #40] ; ptr
str r1, [sp, #24] ; save ptr = cx_data
str r3, [sp, #20] ; save num_part
str r2, [sp, #8] ; save cx_data_end
ldr r4, _VP8_COMP_tplist_
add r4, r0, r4
ldr r7, [r4, #0] ; dereference cpi->tp_list
str r7, [sp, #32] ; store start of cpi->tp_list
ldr r11, _VP8_COMP_bc2_ ; load up vp8_writer out of cpi
ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
add r0, r0, r11
mov r11, #0
str r11, [sp, #28] ; i
numparts_loop
ldr r10, [sp, #40] ; ptr
ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
add r0, r2 ; bc[i + 1]
ldr r10, [sp, #24] ; ptr
ldr r5, [sp, #36] ; move mb_rows to the counting section
subs r5, r5, r11 ; move start point with each partition
; mb_rows starts at i
......@@ -72,6 +82,10 @@ numparts_loop
; Reset all of the VP8 Writer data for each partition that
; is processed.
; start_encode
ldr r3, [sp, #8]
str r3, [r0, #vp8_writer_buffer_end]
mov r2, #0 ; vp8_writer_lowvalue
mov r5, #255 ; vp8_writer_range
mvn r3, #23 ; vp8_writer_count
......@@ -182,6 +196,9 @@ token_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
; r10 is used earlier in the loop, but r10 is used as
......@@ -277,6 +294,9 @@ extra_high_bit_not_set
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
ldr r10, [sp, #4] ; b->tree
extra_count_lt_zero
......@@ -320,12 +340,15 @@ end_high_bit_not_set
bne end_count_zero
ldr r4, [r0, #vp8_writer_pos]
mvn r3, #7
mvn r3, #7 ; count = -8
ldr r7, [r0, #vp8_writer_buffer]
lsr r6, r2, #24 ; lowvalue >> 24
add r12, r4, #1 ; w->pos++
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r12, [r0, #0x10]
str r12, [r0, #vp8_writer_pos]
VALIDATE_POS r7, r12 ; validate_buffer at pos
strb r6, [r7, r4]
end_count_zero
skip_extra_bits
......@@ -401,6 +424,9 @@ token_high_bit_not_set_se
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
str r11, [r0, #vp8_writer_pos]
sub r3, r3, #8 ; count -= 8
VALIDATE_POS r10, r11 ; validate_buffer at pos
strb r7, [r10, r4] ; w->buffer[w->pos++]
token_count_lt_zero_se
......@@ -409,33 +435,10 @@ token_count_lt_zero_se
subs r12, r12, #1
bne stop_encode_loop
ldr r10, [sp, #8] ; *size
ldr r11, [r10]
ldr r4, [r0, #vp8_writer_pos] ; w->pos
add r11, r11, r4 ; *size += w->pos
str r11, [r10]
ldr r9, [sp, #20] ; num_parts
sub r9, r9, #1
ldr r10, [sp, #28] ; i
cmp r10, r9 ; if(i<(num_part - 1))
bge skip_write_partition
ldr r12, [sp, #40] ; ptr
ldr r12, [sp, #24] ; ptr
add r12, r12, r4 ; ptr += w->pos
str r12, [sp, #40]
ldr r9, [sp, #24] ; cx_data
mov r8, r4, asr #8
strb r4, [r9, #0]
strb r8, [r9, #1]
mov r4, r4, asr #16
strb r4, [r9, #2]
add r9, r9, #3 ; cx_data += 3
str r9, [sp, #24]
skip_write_partition
str r12, [sp, #24]
ldr r11, [sp, #28] ; i
ldr r10, [sp, #20] ; num_parts
......@@ -451,9 +454,8 @@ skip_write_partition
cmp r10, r11
bgt numparts_loop
add sp, sp, #44
pop {r4-r11, pc}
add sp, sp, #40
pop {r4-r12, pc}
ENDP
_VP8_COMP_common_
......@@ -462,7 +464,9 @@ _VP8_COMMON_MBrows_
DCD vp8_common_mb_rows
_VP8_COMP_tplist_
DCD vp8_comp_tplist
_VP8_COMP_bc2_
DCD vp8_comp_bc2
_VP8_COMP_bc_
DCD vp8_comp_bc
_vp8_writer_sz_
DCD vp8_writer_sz
END
......@@ -10,7 +10,7 @@
#include "vp8/encoder/boolhuff.h"
#include "vp8/common/blockd.h"
#include "vpx/internal/vpx_codec_internal.h"
const unsigned int vp8_prob_cost[256] =
{
......@@ -32,3 +32,10 @@ const unsigned int vp8_prob_cost[256] =
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
};
int vp8_validate_buffer_arm(const unsigned char *start,
size_t len,
const unsigned char *end,
struct vpx_internal_error_info *error)
{
return validate_buffer(start, len, end, error);
}
......@@ -50,6 +50,7 @@ DEFINE(vp8_writer_count, offsetof(vp8_writer, count));
DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end));
DEFINE(vp8_writer_error, offsetof(vp8_writer, error));
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
......@@ -69,7 +70,8 @@ DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, b
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
DEFINE(vp8_comp_bc2, offsetof(VP8_COMP, bc2));
DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc));
DEFINE(vp8_writer_sz , sizeof(vp8_writer));
DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
......
......@@ -109,7 +109,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi)
{
VP8_COMMON *const x = & cpi->common;
vp8_writer *const w = & cpi->bc;
vp8_writer *const w = cpi->bc;
{
vp8_prob Pnew [VP8_YMODES-1];
......@@ -374,20 +374,21 @@ static void write_partition_size(unsigned char *cx_data, int size)
}
static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, unsigned char * cx_data_end, int num_part, int *size)
static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
unsigned char * cx_data_end,
int num_part)
{
int i;
unsigned char *ptr = cx_data;
unsigned char *ptr_end = cx_data_end;
unsigned int shift;
vp8_writer *w = &cpi->bc2;
*size = 3 * (num_part - 1);
cpi->partition_sz[0] += *size;
ptr = cx_data + (*size);
vp8_writer *w;
ptr = cx_data;
for (i = 0; i < num_part; i++)
{
w = cpi->bc + i + 1;
vp8_start_encode(w, ptr, ptr_end);
{
unsigned int split;
......@@ -597,17 +598,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
}
vp8_stop_encode(w);
*size += w->pos;
/* The first partition size is set earlier */
cpi->partition_sz[i + 1] = w->pos;
if (i < (num_part - 1))
{
write_partition_size(cx_data, w->pos);
cx_data += 3;
ptr += w->pos;
}
ptr += w->pos;
}
}
......@@ -892,7 +883,7 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO
static void pack_inter_mode_mvs(VP8_COMP *const cpi)
{
VP8_COMMON *const pc = & cpi->common;
vp8_writer *const w = & cpi->bc;
vp8_writer *const w = cpi->bc;
const MV_CONTEXT *mvc = pc->fc.mvc;
const int *const rfct = cpi->count_mb_ref_frame_usage;
......@@ -1107,7 +1098,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
static void write_kfmodes(VP8_COMP *cpi)
{
vp8_writer *const bc = & cpi->bc;
vp8_writer *const bc = cpi->bc;
const VP8_COMMON *const c = & cpi->common;
/* const */
MODE_INFO *m = c->mi;
......@@ -1437,7 +1428,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
static void update_coef_probs(VP8_COMP *cpi)
{
int i = 0;
vp8_writer *const w = & cpi->bc;
vp8_writer *const w = cpi->bc;
int savings = 0;
vp8_clear_system_state(); //__asm emms;
......@@ -1583,7 +1574,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
int i, j;
VP8_HEADER oh;
VP8_COMMON *const pc = & cpi->common;
vp8_writer *const bc = & cpi->bc;
vp8_writer *const bc = cpi->bc;
MACROBLOCKD *const xd = & cpi->mb.e_mbd;
int extra_bytes_packed = 0;
......@@ -1598,8 +1589,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
mb_feature_data_bits = vp8_mb_feature_data_bits;
cpi->bc.error = &pc->error;
cpi->bc2.error = &pc->error;
bc[0].error = &pc->error;
validate_buffer(cx_data, 3, cx_data_end, &cpi->common.error);
cx_data += 3;
......@@ -1879,7 +1869,9 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_stop_encode(bc);