Commit c34d91a8 authored by Scott LaVarnway's avatar Scott LaVarnway Committed by Gerrit Code Review
Browse files

Merge "Packing bitstream on-the-fly with delayed context updates"

parents aabae97e 52cf4dca
......@@ -39,6 +39,7 @@ Advanced options:
${toggle_multithread} multithreaded encoding and decoding
${toggle_spatial_resampling} spatial sampling (scaling) support
${toggle_realtime_only} enable this option while building for real-time encoding
${toggle_onthefly_bitpacking} enable on-the-fly bitpacking in real-time encoding
${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses
${toggle_runtime_cpu_detect} runtime cpu detection
${toggle_shared} shared library support
......@@ -253,6 +254,7 @@ CONFIG_LIST="
static_msvcrt
spatial_resampling
realtime_only
onthefly_bitpacking
error_concealment
shared
static
......@@ -297,6 +299,7 @@ CMDLINE_SELECT="
mem_tracker
spatial_resampling
realtime_only
onthefly_bitpacking
error_concealment
shared
static
......
......@@ -24,6 +24,7 @@
#include "bitstream.h"
#include "defaultcoefcounts.h"
#include "vp8/common/common.h"
const int vp8cx_base_skip_false_prob[128] =
{
......@@ -159,7 +160,7 @@ static void write_split(vp8_writer *bc, int x)
);
}
static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *const stop = p + xcount;
unsigned int split;
......@@ -398,7 +399,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = stop - p;
pack_tokens_c(w, p, tokens);
vp8_pack_tokens_c(w, p, tokens);
}
vp8_stop_encode(w);
......@@ -417,7 +418,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = stop - p;
pack_tokens_c(w, p, tokens);
vp8_pack_tokens_c(w, p, tokens);
}
}
......@@ -783,6 +784,7 @@ static void write_kfmodes(VP8_COMP *cpi)
}
}
#if 0
/* This function is used for debugging probability trees. */
static void print_prob_tree(vp8_prob
coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES])
......@@ -814,6 +816,7 @@ static void print_prob_tree(vp8_prob
fprintf(f, "}\n");
fclose(f);
}
#endif
static void sum_probs_over_prev_coef_context(
const unsigned int probs[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
......@@ -943,7 +946,6 @@ static int default_coef_context_savings(VP8_COMP *cpi)
int t = 0; /* token/prob index */
vp8_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
cpi->frame_coef_probs [i][j][k],
......@@ -1048,10 +1050,33 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
return savings;
}
static void update_coef_probs(VP8_COMP *cpi)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
int vp8_update_coef_context(VP8_COMP *cpi)
{
int savings = 0;
if (cpi->common.frame_type == KEY_FRAME)
{
/* Reset to default counts/probabilities at key frames */
vp8_copy(cpi->coef_counts, default_coef_counts);
}
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
savings += independent_coef_context_savings(cpi);
else
savings += default_coef_context_savings(cpi);
return savings;
}
#endif
void vp8_update_coef_probs(VP8_COMP *cpi)
{
int i = 0;
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *const w = cpi->bc;
#endif
int savings = 0;
vp8_clear_system_state(); //__asm emms;
......@@ -1131,7 +1156,11 @@ static void update_coef_probs(VP8_COMP *cpi)
cpi->common.frame_type == KEY_FRAME && newp != *Pold)
u = 1;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
cpi->update_probs[i][j][k][t] = u;
#else
vp8_write(w, u, upd);
#endif
#ifdef ENTROPY_STATS
......@@ -1143,7 +1172,9 @@ static void update_coef_probs(VP8_COMP *cpi)
/* send/use new probability */
*Pold = newp;
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_write_literal(w, newp, 8);
#endif
savings += s;
......@@ -1172,6 +1203,50 @@ static void update_coef_probs(VP8_COMP *cpi)
while (++i < BLOCK_TYPES);
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
static void pack_coef_probs(VP8_COMP *cpi)
{
int i = 0;
vp8_writer *const w = cpi->bc;
do
{
int j = 0;
do
{
int k = 0;
do
{
int t = 0; /* token/prob index */
do
{
const vp8_prob newp = cpi->common.fc.coef_probs [i][j][k][t];
const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
const char u = cpi->update_probs[i][j][k][t] ;
vp8_write(w, u, upd);
if (u)
{
/* send/use new probability */
vp8_write_literal(w, newp, 8);
}
}
while (++t < ENTROPY_NODES);
}
while (++k < PREV_COEF_CONTEXTS);
}
while (++j < COEF_BANDS);
}
while (++i < BLOCK_TYPES);
}
#endif
#ifdef PACKET_TESTING
FILE *vpxlogc = 0;
#endif
......@@ -1434,6 +1509,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
}
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
{
if (pc->frame_type == KEY_FRAME)
......@@ -1441,6 +1517,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
else
pc->refresh_entropy_probs = 0;
}
#endif
vp8_write_bit(bc, pc->refresh_entropy_probs);
......@@ -1458,13 +1535,17 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_clear_system_state(); //__asm emms;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
pack_coef_probs(cpi);
#else
if (pc->refresh_entropy_probs == 0)
{
// save a copy for later refresh
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
}
update_coef_probs(cpi);
vp8_update_coef_probs(cpi);
#endif
#ifdef ENTROPY_STATS
active_section = 2;
......@@ -1512,6 +1593,45 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
cpi->partition_sz[0] = *size;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
const int num_part = (1 << pc->multi_token_partition);
unsigned char * dp = cpi->partition_d[0] + cpi->partition_sz[0];
if (num_part > 1)
{
/* write token part sizes (all but last) if more than 1 */
validate_buffer(dp, 3 * (num_part - 1), cpi->partition_d_end[0],
&pc->error);
cpi->partition_sz[0] += 3*(num_part-1);
for(i = 1; i < num_part; i++)
{
write_partition_size(dp, cpi->partition_sz[i]);
dp += 3;
}
}
if (!cpi->output_partition)
{
/* concatenate partition buffers */
for(i = 0; i < num_part; i++)
{
vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
cpi->partition_d[i+1] = dp;
dp += cpi->partition_sz[i+1];
}
}
/* update total size */
*size = 0;
for(i = 0; i < num_part+1; i++)
{
*size += cpi->partition_sz[i];
}
}
#else
if (pc->multi_token_partition != ONE_PARTITION)
{
int num_part = 1 << pc->multi_token_partition;
......@@ -1561,6 +1681,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
*size += cpi->bc[1].pos;
cpi->partition_sz[1] = cpi->bc[1].pos;
}
#endif
}
#ifdef ENTROPY_STATS
......
......@@ -35,7 +35,10 @@ void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
# define pack_mb_row_tokens(a,b) \
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
#endif
......
......@@ -28,6 +28,9 @@
#include <limits.h>
#include "vp8/common/invtrans.h"
#include "vpx_ports/vpx_timer.h"
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
#include "bitstream.h"
#endif
extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
......@@ -373,10 +376,17 @@ void encode_mb_row(VP8_COMP *cpi,
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cpi->common.mb_cols);
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
const int num_part = (1 << cm->multi_token_partition);
TOKENEXTRA * tp_start = cpi->tok;
vp8_writer *w;
#endif
#if CONFIG_MULTITHREAD
const int nsync = cpi->mt_sync_range;
const int rightmost_col = cm->mb_cols - 1;
const int rightmost_col = cm->mb_cols + nsync;
volatile const int *last_row_current_mb_col;
volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
......@@ -384,6 +394,13 @@ void encode_mb_row(VP8_COMP *cpi,
last_row_current_mb_col = &rightmost_col;
#endif
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
if(num_part > 1)
w= &cpi->bc[1 + (mb_row % num_part)];
else
w = &cpi->bc[1];
#endif
// reset above block coeffs
xd->above_context = cm->above_context;
......@@ -411,6 +428,10 @@ void encode_mb_row(VP8_COMP *cpi,
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
*tp = cpi->tok;
#endif
// Distance of Mb to the left & right edges, specified in
// 1/8th pel units as they are always compared to values
// that are in 1/8th pel units
......@@ -435,12 +456,13 @@ void encode_mb_row(VP8_COMP *cpi,
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
#if CONFIG_MULTITHREAD
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
if (cpi->b_multi_threaded != 0)
{
*current_mb_col = mb_col - 1; // set previous MB done
if ((mb_col & (nsync - 1)) == 0)
{
while (mb_col > (*last_row_current_mb_col - nsync)
&& (*last_row_current_mb_col) != (cm->mb_cols - 1))
while (mb_col > (*last_row_current_mb_col - nsync))
{
x86_pause_hint();
thread_sleep(0);
......@@ -495,13 +517,13 @@ void encode_mb_row(VP8_COMP *cpi,
#endif
// Count of last ref frame 0,0 useage
// Count of last ref frame 0,0 usage
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count ++;
// Special case code for cyclic refresh
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
// during vp8cx_encode_inter_macroblock()) back into the global segmentation map
if ((cpi->current_layer == 0) &&
(cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled))
{
......@@ -525,7 +547,14 @@ void encode_mb_row(VP8_COMP *cpi,
cpi->tplist[mb_row].stop = *tp;
// Increment pointer into gf useage flags structure.
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
/* pack tokens for this MB */
{
int tok_count = *tp - tp_start;
pack_tokens(w, tp_start, tok_count);
}
#endif
// Increment pointer into gf usage flags structure.
x->gf_active_ptr++;
// Increment the activity mask pointers.
......@@ -539,39 +568,29 @@ void encode_mb_row(VP8_COMP *cpi,
recon_yoffset += 16;
recon_uvoffset += 8;
// Keep track of segment useage
// Keep track of segment usage
segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
// skip to next mb
xd->mode_info_context++;
x->partition_info++;
xd->above_context++;
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
{
cpi->mt_current_mb_col[mb_row] = mb_col;
}
#endif
}
//extend the recon for intra prediction
vp8_extend_mb_row(
&cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
*current_mb_col = rightmost_col;
#endif
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
#if CONFIG_MULTITHREAD
if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
{
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
}
#endif
}
void init_encode_frame_mb_context(VP8_COMP *cpi)
......@@ -599,7 +618,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
if (cm->frame_type == KEY_FRAME)
vp8_init_mbmode_probs(cm);
// Copy data over into macro block data sturctures.
// Copy data over into macro block data structures.
x->src = * cpi->Source;
xd->pre = cm->yv12_fb[cm->lst_fb_idx];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
......@@ -656,10 +675,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
MACROBLOCK *const x = & cpi->mb;
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
TOKENEXTRA *tp = cpi->tok;
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition
const int num_part = (1 << cm->multi_token_partition);
#endif
vpx_memset(segment_counts, 0, sizeof(segment_counts));
totalrate = 0;
......@@ -694,6 +716,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->prediction_error = 0;
cpi->intra_error = 0;
cpi->skip_true_count = 0;
cpi->tok_count = 0;
#if 0
// Experimental code
......@@ -704,6 +727,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
xd->mode_info_context = cm->mi;
vp8_zero(cpi->MVcount);
vp8_zero(cpi->coef_counts);
vp8cx_frame_init_quantizer(cpi);
......@@ -722,9 +746,22 @@ void vp8_encode_frame(VP8_COMP *cpi)
build_activity_map(cpi);
}
// re-initencode frame context.
// re-init encode frame context.
init_encode_frame_mb_context(cpi);
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
int i;
for(i = 0; i < num_part; i++)
{
vp8_start_encode(&bc[i], cpi->partition_d[i + 1],
cpi->partition_d_end[i + 1]);
bc[i].error = &cm->error;
}
}
#endif
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
......@@ -748,7 +785,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
{
vp8_zero(cm->left_context)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
tp = cpi->tok;
#else
tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
#endif
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
......@@ -761,12 +802,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count;
if(mb_row == cm->mb_rows - 1)
{
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
}
}
sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
cpi->tok_count = 0;
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
{
cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
......@@ -799,9 +842,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
vp8_zero(cm->left_context)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
tp = cpi->tok;
#endif
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
// adjust to the next row of mbs
......@@ -811,16 +857,25 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
cpi->tok_count = tp - cpi->tok;
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
int i;
for(i = 0; i < num_part; i++)
{
vp8_stop_encode(&bc[i]);
cpi->partition_sz[i+1] = bc[i].pos;
}
}
#endif
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
}
// Work out the segment probabilites if segmentation is enabled
// Work out the segment probabilities if segmentation is enabled
if (xd->segmentation_enabled)
{
int tot_count;
......@@ -908,20 +963,16 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
#endif
// Adjust the projected reference frame useage probability numbers to reflect
// what we have just seen. This may be usefull when we make multiple itterations
#if ! CONFIG_REALTIME_ONLY
// Adjust the projected reference frame usage probability numbers to reflect
// what we have just seen. This may be useful when we make multiple iterations
// of the recode loop rather than continuing to use values from the previous frame.
if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
(!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)))
{
vp8_convert_rfct_to_prob(cpi);
}
#if 0
// Keep record of the total distortion this time around for future use
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{
......
......@@ -13,6 +13,8 @@
#include "vp8/common/common.h"
#include "vp8/common/extend.h"
#include "bitstream.h"
#if CONFIG_MULTITHREAD
extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
......@@ -74,6 +76,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
MACROBLOCK *x = &mbri->mb;
MACROBLOCKD *xd = &x->e_mbd;
TOKENEXTRA *tp ;
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24);
const int num_part = (1 << cm->multi_token_partition);
#endif
int *segment_counts = mbri->segment_counts;
int *totalrate = &mbri->totalrate;
......@@ -91,9 +97,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cm->mb_cols);
volatile int *last_row_current_mb_col;
volatile const int *last_row_current_mb_col;
volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
#else
tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24));
cpi->tplist[mb_row].start = tp;
#endif
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
......@@ -107,25 +119,27 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);
cpi->tplist[mb_row].start = tp;