Commit 88841f10 authored by John Koleszar's avatar John Koleszar
Browse files

Refactor lookahead ring buffer

This patch cleans up the source buffer storage and copy mechanism to
allow access through a standard push/pop/peek interface. This approach
also avoids an extra copy in the case where the source is not a
multiple of 16, fixing issue #102.

Change-Id: I05808c39f5743625cb4c7af54cc841b9b10fdbd9
parent 538f1104
......@@ -13,10 +13,12 @@
#include "vpx_mem/vpx_mem.h"
static void extend_plane_borders
static void copy_and_extend_plane
(
unsigned char *s, /* source */
int sp, /* pitch */
int sp, /* source pitch */
unsigned char *d, /* destination */
int dp, /* destination pitch */
int h, /* height */
int w, /* width */
int et, /* extend top border */
......@@ -25,7 +27,6 @@ static void extend_plane_borders
int er /* extend right border */
)
{
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
......@@ -34,68 +35,73 @@ static void extend_plane_borders
/* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
dest_ptr1 = s - el;
dest_ptr2 = s + w;
dest_ptr1 = d - el;
dest_ptr2 = d + w;
for (i = 0; i < h - 0 + 1; i++)
{
/* Some linkers will complain if we call vpx_memset with el set to a
* constant 0.
*/
if (el)
vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
vpx_memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
dest_ptr1 += sp;
dest_ptr2 += sp;
dest_ptr1 += dp;
dest_ptr2 += dp;
}
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = s - el;
src_ptr2 = s + sp * (h - 1) - el;
dest_ptr1 = s + sp * (-et) - el;
dest_ptr2 = s + sp * (h) - el;
linesize = el + er + w + 1;
/* Now copy the top and bottom lines into each line of the respective
* borders
*/
src_ptr1 = d - el;
src_ptr2 = d + dp * (h - 1) - el;
dest_ptr1 = d + dp * (-et) - el;
dest_ptr2 = d + dp * (h) - el;
linesize = el + er + w;
for (i = 0; i < (int)et; i++)
for (i = 0; i < et; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
dest_ptr1 += sp;
dest_ptr1 += dp;
}
for (i = 0; i < (int)eb; i++)
for (i = 0; i < eb; i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
dest_ptr2 += sp;
dest_ptr2 += dp;
}
}
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst)
{
int er = 0xf & (16 - (width & 0xf));
int eb = 0xf & (16 - (height & 0xf));
/* check for non multiples of 16 */
if (er != 0 || eb != 0)
{
extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
/* adjust for uv */
height = (height + 1) >> 1;
width = (width + 1) >> 1;
er = 0x7 & (8 - (width & 0x7));
eb = 0x7 & (8 - (height & 0x7));
if (er || eb)
{
extend_plane_borders(ybf->u_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
extend_plane_borders(ybf->v_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
}
}
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_height, src->y_width,
et, el, eb, er);
et = (et + 1) >> 1;
el = (el + 1) >> 1;
eb = (eb + 1) >> 1;
er = (er + 1) >> 1;
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
}
/* note the extension is only for the last row, for intra prediction purpose */
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
{
......
......@@ -14,8 +14,8 @@
#include "vpx_scale/yv12config.h"
void Extend(YV12_BUFFER_CONFIG *ybf);
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height);
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
#endif
......@@ -786,7 +786,8 @@ void vp8_first_pass(VP8_COMP *cpi)
// TODO: handle the case when duration is set to 0, or something less
// than the full time between subsequent cpi->source_time_stamp s .
fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp;
fps.duration = cpi->source->ts_end
- cpi->source->ts_start;
// don't want to do output stats with a stack variable!
memcpy(cpi->this_frame_stats,
......
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include <stdlib.h>
#include "vpx_config.h"
#include "lookahead.h"
#include "vp8/common/extend.h"
#define MAX_LAG_BUFFERS (CONFIG_REALTIME_ONLY? 1 : 25)
struct lookahead_ctx
{
unsigned int max_sz; /* Absolute size of the queue */
unsigned int sz; /* Number of buffers currently in the queue */
unsigned int read_idx; /* Read index */
unsigned int write_idx; /* Write index */
struct lookahead_entry *buf; /* Buffer list */
};
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *
pop(struct lookahead_ctx *ctx,
unsigned int *idx)
{
unsigned int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
if(++index >= ctx->max_sz)
index -= ctx->max_sz;
*idx = index;
return buf;
}
void
vp8_lookahead_destroy(struct lookahead_ctx *ctx)
{
if(ctx)
{
if(ctx->buf)
{
int i;
for(i = 0; i < ctx->max_sz; i++)
vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img);
free(ctx->buf);
}
free(ctx);
}
}
struct lookahead_ctx*
vp8_lookahead_init(unsigned int width,
unsigned int height,
unsigned int depth)
{
struct lookahead_ctx *ctx = NULL;
int i;
/* Clamp the lookahead queue depth */
if(depth < 1)
depth = 1;
else if(depth > MAX_LAG_BUFFERS)
depth = MAX_LAG_BUFFERS;
/* Align the buffer dimensions */
width = (width + 15) & ~15;
height = (height + 15) & ~15;
/* Allocate the lookahead structures */
ctx = calloc(1, sizeof(*ctx));
if(ctx)
{
ctx->max_sz = depth;
ctx->buf = calloc(depth, sizeof(*ctx->buf));
if(!ctx->buf)
goto bail;
for(i=0; i<depth; i++)
if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img, width, height, 16))
goto bail;
}
return ctx;
bail:
vp8_lookahead_destroy(ctx);
return NULL;
}
int
vp8_lookahead_push(struct lookahead_ctx *ctx,
YV12_BUFFER_CONFIG *src,
int64_t ts_start,
int64_t ts_end,
unsigned int flags)
{
struct lookahead_entry* buf;
if(ctx->sz + 1 > ctx->max_sz)
return 1;
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
vp8_copy_and_extend_frame(src, &buf->img);
buf->ts_start = ts_start;
buf->ts_end = ts_end;
buf->flags = flags;
return 0;
}
struct lookahead_entry*
vp8_lookahead_pop(struct lookahead_ctx *ctx,
int drain)
{
struct lookahead_entry* buf = NULL;
if(ctx->sz && (drain || ctx->sz == ctx->max_sz))
{
buf = pop(ctx, &ctx->read_idx);
ctx->sz--;
}
return buf;
}
struct lookahead_entry*
vp8_lookahead_peek(struct lookahead_ctx *ctx,
int index)
{
struct lookahead_entry* buf = NULL;
assert(index < ctx->max_sz);
if(index < ctx->sz)
{
index += ctx->read_idx;
if(index >= ctx->max_sz)
index -= ctx->max_sz;
buf = ctx->buf + index;
}
return buf;
}
unsigned int
vp8_lookahead_depth(struct lookahead_ctx *ctx)
{
return ctx->sz;
}
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LOOKAHEAD_H
#define LOOKAHEAD_H
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
struct lookahead_entry
{
YV12_BUFFER_CONFIG img;
int64_t ts_start;
int64_t ts_end;
unsigned int flags;
};
struct lookahead_ctx;
/**\brief Initializes the lookahead stage
*
* The lookahead stage is a queue of frame buffers on which some analysis
* may be done when buffers are enqueued.
*
*
*/
struct lookahead_ctx* vp8_lookahead_init(unsigned int width,
unsigned int height,
unsigned int depth
);
/**\brief Destroys the lookahead stage
*
*/
void vp8_lookahead_destroy(struct lookahead_ctx *ctx);
/**\brief Enqueue a source buffer
*
* This function will copy the source image into a new framebuffer with
* the expected stride/border.
*
* \param[in] ctx Pointer to the lookahead context
* \param[in] src Pointer to the image to enqueue
* \param[in] ts_start Timestamp for the start of this frame
* \param[in] ts_end Timestamp for the end of this frame
* \param[in] flags Flags set on this frame
*/
int
vp8_lookahead_push(struct lookahead_ctx *ctx,
YV12_BUFFER_CONFIG *src,
int64_t ts_start,
int64_t ts_end,
unsigned int flags);
/**\brief Get the next source buffer to encode
*
*
* \param[in] ctx Pointer to the lookahead context
* \param[in] drain Flag indicating the buffer should be drained
* (return a buffer regardless of the current queue depth)
*
* \retval NULL, if drain set and queue is empty
* \retval NULL, if drain not set and queue not of the configured depth
*
*/
struct lookahead_entry*
vp8_lookahead_pop(struct lookahead_ctx *ctx,
int drain);
/**\brief Get a future source buffer to encode
*
* \param[in] ctx Pointer to the lookahead context
* \param[in] index Index of the frame to be returned, 0 == next frame
*
* \retval NULL, if no buffer exists at the specified index
*
*/
struct lookahead_entry*
vp8_lookahead_peek(struct lookahead_ctx *ctx,
int index);
/**\brief Get the number of frames currently in the lookahead queue
*
* \param[in] ctx Pointer to the lookahead context
*/
unsigned int
vp8_lookahead_depth(struct lookahead_ctx *ctx);
#endif
......@@ -71,7 +71,7 @@ extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_
int vp8_estimate_entropy_savings(VP8_COMP *cpi);
int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi);
extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance);
static void set_default_lf_deltas(VP8_COMP *cpi);
......@@ -287,16 +287,9 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf);
vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source);
#if VP8_TEMPORAL_ALT_REF
vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer);
vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer);
#endif
{
int i;
for (i = 0; i < MAX_LAG_BUFFERS; i++)
vp8_yv12_de_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer);
cpi->source_buffer_count = 0;
}
vp8_lookahead_destroy(cpi->lookahead);
vpx_free(cpi->tok);
cpi->tok = 0;
......@@ -1252,35 +1245,23 @@ void vp8_set_speed_features(VP8_COMP *cpi)
}
static void alloc_raw_frame_buffers(VP8_COMP *cpi)
{
int i, buffers;
/* allocate source_buffer to be multiples of 16 */
int width = (cpi->oxcf.Width + 15) & ~15;
int height = (cpi->oxcf.Height + 15) & ~15;
buffers = cpi->oxcf.lag_in_frames;
if (buffers > MAX_LAG_BUFFERS)
buffers = MAX_LAG_BUFFERS;
if (buffers < 1)
buffers = 1;
for (i = 0; i < buffers; i++)
if (vp8_yv12_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer,
width, cpi->oxcf.Height,
16))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffer");
cpi->lookahead = vp8_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height,
cpi->oxcf.lag_in_frames);
if(!cpi->lookahead)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
#if VP8_TEMPORAL_ALT_REF
if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer,
width, cpi->oxcf.Height, 16))
if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer,
width, height, 16))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
#endif
cpi->source_buffer_count = 0;
}
static int vp8_alloc_partition_data(VP8_COMP *cpi)
......@@ -1772,7 +1753,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
// YX Temp
cpi->last_alt_ref_sei = -1;
cpi->alt_ref_source = NULL;
cpi->is_src_frame_alt_ref = 0;
cpi->is_next_src_alt_ref = 0;
......@@ -2120,8 +2101,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->ready_for_new_frame = 1;
cpi->source_encode_index = 0;
// make sure frame 1 is okay
cpi->error_bins[0] = cpi->common.MBs;
......@@ -2169,7 +2148,8 @@ void vp8_remove_compressor(VP8_PTR *ptr)
if (cpi->pass != 1)
{
FILE *f = fopen("opsnr.stt", "a");
double time_encoded = (cpi->source_end_time_stamp - cpi->first_time_stamp_ever) / 10000000.000;
double time_encoded = (cpi->last_end_time_stamp_seen
- cpi->first_time_stamp_ever) / 10000000.000;
double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
......@@ -2624,37 +2604,13 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
vp8_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer,
tmp_height, hs, hr, vs, vr, 0);
vp8_yv12_extend_frame_borders(&cpi->scaled_source);
cpi->Source = &cpi->scaled_source;
#endif
}
// we may need to copy to a buffer so we can extend the image...
else if (cm->Width != cm->yv12_fb[cm->lst_fb_idx].y_width ||
cm->Height != cm->yv12_fb[cm->lst_fb_idx].y_height)
{
//vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
#if HAVE_ARMV7
#if CONFIG_RUNTIME_CPU_DETECT
if (cm->rtcd.flags & HAS_NEON)
#endif
{
vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
}
#if CONFIG_RUNTIME_CPU_DETECT
else
#endif
#endif
#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
{
vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
}
#endif
cpi->Source = &cpi->scaled_source;
}
}
vp8_extend_to_multiple_of16(cpi->Source, cm->Width, cm->Height);
}
static void resize_key_frame(VP8_COMP *cpi)
{
#if CONFIG_SPATIAL_RESAMPLING
......@@ -3567,7 +3523,6 @@ static void encode_frame_to_data_rate
if (Adjustment)
{
int buff_lvl_step;
int tmp_lvl = cpi->buffer_level;
if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size)
{
......@@ -4668,17 +4623,17 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
#endif
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
{
#if HAVE_ARMV7
INT64 store_reg[8];
#endif
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer timer;
if (!cpi)
return -1;
int res = 0;
#if HAVE_ARMV7
#if CONFIG_RUNTIME_CPU_DETECT
......@@ -4690,75 +4645,10 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
#endif
vpx_usec_timer_start(&timer);
// no more room for frames;
if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
{
#if HAVE_ARMV7
#if CONFIG_RUNTIME_CPU_DETECT
if (cm->rtcd.flags & HAS_NEON)
#endif
{
vp8_pop_neon(store_reg);
}
#endif
return -1;
}
//printf("in-cpi->source_buffer_count: %d\n", cpi->source_buffer_count);
if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
frame_flags))
res = -1;
cm->clr_type = sd->clrtype;
// make a copy of the frame for use later...
#if !(CONFIG_REALTIME_ONLY)
if (cpi->oxcf.allow_lag)
{
int which_buffer = cpi->source_encode_index - 1;
SOURCE_SAMPLE *s;
if (which_buffer == -1)