Commit d115cd8b authored by James Zern's avatar James Zern Committed by Gerrit Code Review

Merge changes I082959ab,Ib6932640

* changes:
  vp9/decoder: threaded row-based loop filter
  vp9/decoder: add thread worker
parents f4837579 a0ffa279
......@@ -89,6 +89,7 @@ LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
......
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/decoder/vp9_thread.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/md5_helper.h"
#include "test/webm_video_source.h"
namespace {
class VP9WorkerThreadTest : public ::testing::Test {
protected:
virtual ~VP9WorkerThreadTest() {}
virtual void SetUp() {
vp9_worker_init(&worker_);
}
virtual void TearDown() {
vp9_worker_end(&worker_);
}
VP9Worker worker_;
};
int ThreadHook(void* data, void* return_value) {
int* const hook_data = reinterpret_cast<int*>(data);
*hook_data = 5;
return *reinterpret_cast<int*>(return_value);
}
TEST_F(VP9WorkerThreadTest, HookSuccess) {
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
for (int i = 0; i < 2; ++i) {
EXPECT_TRUE(vp9_worker_reset(&worker_));
int hook_data = 0;
int return_value = 1; // return successfully from the hook
worker_.hook = ThreadHook;
worker_.data1 = &hook_data;
worker_.data2 = &return_value;
vp9_worker_launch(&worker_);
EXPECT_TRUE(vp9_worker_sync(&worker_));
EXPECT_FALSE(worker_.had_error);
EXPECT_EQ(5, hook_data);
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
}
}
TEST_F(VP9WorkerThreadTest, HookFailure) {
EXPECT_TRUE(vp9_worker_reset(&worker_));
int hook_data = 0;
int return_value = 0; // return failure from the hook
worker_.hook = ThreadHook;
worker_.data1 = &hook_data;
worker_.data2 = &return_value;
vp9_worker_launch(&worker_);
EXPECT_FALSE(vp9_worker_sync(&worker_));
EXPECT_TRUE(worker_.had_error);
// Ensure _reset() clears the error and _launch() can be called again.
return_value = 1;
EXPECT_TRUE(vp9_worker_reset(&worker_));
EXPECT_FALSE(worker_.had_error);
vp9_worker_launch(&worker_);
EXPECT_TRUE(vp9_worker_sync(&worker_));
EXPECT_FALSE(worker_.had_error);
}
TEST(VP9DecodeMTTest, MTDecode) {
libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
video.Init();
vpx_codec_dec_cfg_t cfg = {0};
cfg.threads = 2;
libvpx_test::VP9Decoder decoder(cfg, 0);
libvpx_test::MD5 md5;
for (video.Begin(); video.cxdata(); video.Next()) {
const vpx_codec_err_t res =
decoder.DecodeFrame(video.cxdata(), video.frame_size());
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
const vpx_image_t *img = NULL;
// Get decompressed data
while ((img = dec_iter.Next())) {
md5.Add(img);
}
}
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
}
} // namespace
......@@ -383,3 +383,11 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
0, cm->mi_rows, y_only);
}
int vp9_loop_filter_worker(void *arg1, void *arg2) {
LFWorkerData *const lf_data = (LFWorkerData*)arg1;
(void)arg2;
vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
lf_data->start, lf_data->stop, lf_data->y_only);
return 1;
}
......@@ -57,4 +57,18 @@ void vp9_loop_filter_frame(struct VP9Common *cm,
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
struct VP9Common *cm, struct macroblockd *xd,
int start, int stop, int y_only);
typedef struct LoopFilterWorkerData {
const YV12_BUFFER_CONFIG *frame_buffer;
struct VP9Common *cm;
struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the
// loopfilter. the planes are necessary as their state
// is changed during decode.
int start;
int stop;
int y_only;
} LFWorkerData;
// Operates on the rows described by LFWorkerData passed as 'arg1'.
int vp9_loop_filter_worker(void *arg1, void *arg2);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_
......@@ -34,6 +34,7 @@
#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/decoder/vp9_thread.h"
#include "vp9/decoder/vp9_treereader.h"
static int read_be32(const uint8_t *p) {
......@@ -583,10 +584,18 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
}
static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const pc = &pbi->common;
int mi_row, mi_col;
if (pbi->do_loopfilter_inline) {
if (num_threads > 1) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
lf_data->cm = pc;
lf_data->xd = pbi->mb;
lf_data->y_only = 0;
}
vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level);
}
......@@ -601,17 +610,33 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb =
&pbi->common.yv12_fb[pbi->common.new_fb_idx];
// delay the loopfilter by 1 macroblock row.
const int lf_start = mi_row - MI_BLOCK_SIZE;
if (lf_start < 0) continue;
vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
if (num_threads > 1) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
vp9_worker_sync(&pbi->lf_worker);
lf_data->start = lf_start;
lf_data->stop = mi_row;
pbi->lf_worker.hook = vp9_loop_filter_worker;
vp9_worker_launch(&pbi->lf_worker);
} else {
YV12_BUFFER_CONFIG *const fb =
&pbi->common.yv12_fb[pbi->common.new_fb_idx];
vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
}
}
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
if (num_threads > 1) {
// TODO(jzern): since the loop filter is delayed one mb row, this will be
// forced to wait for the last row scheduled in the for loop.
vp9_worker_sync(&pbi->lf_worker);
}
vp9_loop_filter_rows(fb, pc, &pbi->mb,
mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
}
......
......@@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
pbi->common.error.setjmp = 0;
pbi->decoded_key_frame = 0;
if (pbi->oxcf.max_threads > 1) {
vp9_worker_init(&pbi->lf_worker);
pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
vp9_remove_decompressor(pbi);
return NULL;
}
}
return pbi;
}
......@@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
vp9_worker_end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi);
}
......
......@@ -14,8 +14,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_thread.h"
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
......@@ -38,6 +38,7 @@ typedef struct VP9Decompressor {
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
VP9Worker lf_worker;
} VP9D_COMP;
#endif // VP9_DECODER_VP9_TREEREADER_H_
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Multi-threaded worker
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c
#include <assert.h>
#include <string.h> // for memset()
#include "./vp9_thread.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if CONFIG_MULTITHREAD
#if defined(_WIN32)
//------------------------------------------------------------------------------
// simplistic pthread emulation layer
#include <process.h>
// _beginthreadex requires __stdcall
#define THREADFN unsigned int __stdcall
#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
static int pthread_create(pthread_t* const thread, const void* attr,
unsigned int (__stdcall *start)(void*), void* arg) {
(void)attr;
*thread = (pthread_t)_beginthreadex(NULL, /* void *security */
0, /* unsigned stack_size */
start,
arg,
0, /* unsigned initflag */
NULL); /* unsigned *thrdaddr */
if (*thread == NULL) return 1;
SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
return 0;
}
static int pthread_join(pthread_t thread, void** value_ptr) {
(void)value_ptr;
return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
CloseHandle(thread) == 0);
}
// Mutex
static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
(void)mutexattr;
InitializeCriticalSection(mutex);
return 0;
}
static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
EnterCriticalSection(mutex);
return 0;
}
static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
LeaveCriticalSection(mutex);
return 0;
}
static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
DeleteCriticalSection(mutex);
return 0;
}
// Condition
static int pthread_cond_destroy(pthread_cond_t* const condition) {
int ok = 1;
ok &= (CloseHandle(condition->waiting_sem_) != 0);
ok &= (CloseHandle(condition->received_sem_) != 0);
ok &= (CloseHandle(condition->signal_event_) != 0);
return !ok;
}
static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
(void)cond_attr;
condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
if (condition->waiting_sem_ == NULL ||
condition->received_sem_ == NULL ||
condition->signal_event_ == NULL) {
pthread_cond_destroy(condition);
return 1;
}
return 0;
}
static int pthread_cond_signal(pthread_cond_t* const condition) {
int ok = 1;
if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
// a thread is waiting in pthread_cond_wait: allow it to be notified
ok = SetEvent(condition->signal_event_);
// wait until the event is consumed so the signaler cannot consume
// the event via its own pthread_cond_wait.
ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
WAIT_OBJECT_0);
}
return !ok;
}
static int pthread_cond_wait(pthread_cond_t* const condition,
pthread_mutex_t* const mutex) {
int ok;
// note that there is a consumer available so the signal isn't dropped in
// pthread_cond_signal
if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
return 1;
// now unlock the mutex so pthread_cond_signal may be issued
pthread_mutex_unlock(mutex);
ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
WAIT_OBJECT_0);
ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
pthread_mutex_lock(mutex);
return !ok;
}
#else // _WIN32
# define THREADFN void*
# define THREAD_RETURN(val) val
#endif
//------------------------------------------------------------------------------
static THREADFN thread_loop(void *ptr) { // thread loop
VP9Worker* const worker = (VP9Worker*)ptr;
int done = 0;
while (!done) {
pthread_mutex_lock(&worker->mutex_);
while (worker->status_ == OK) { // wait in idling mode
pthread_cond_wait(&worker->condition_, &worker->mutex_);
}
if (worker->status_ == WORK) {
if (worker->hook) {
worker->had_error |= !worker->hook(worker->data1, worker->data2);
}
worker->status_ = OK;
} else if (worker->status_ == NOT_OK) { // finish the worker
done = 1;
}
// signal to the main thread that we're done (for Sync())
pthread_cond_signal(&worker->condition_);
pthread_mutex_unlock(&worker->mutex_);
}
return THREAD_RETURN(NULL); // Thread is finished
}
// main thread state control
static void change_state(VP9Worker* const worker,
VP9WorkerStatus new_status) {
// no-op when attempting to change state on a thread that didn't come up
if (worker->status_ < OK) return;
pthread_mutex_lock(&worker->mutex_);
// wait for the worker to finish
while (worker->status_ != OK) {
pthread_cond_wait(&worker->condition_, &worker->mutex_);
}
// assign new status and release the working thread if needed
if (new_status != OK) {
worker->status_ = new_status;
pthread_cond_signal(&worker->condition_);
}
pthread_mutex_unlock(&worker->mutex_);
}
#endif
//------------------------------------------------------------------------------
void vp9_worker_init(VP9Worker* const worker) {
memset(worker, 0, sizeof(*worker));
worker->status_ = NOT_OK;
}
int vp9_worker_sync(VP9Worker* const worker) {
#if CONFIG_MULTITHREAD
change_state(worker, OK);
#endif
assert(worker->status_ <= OK);
return !worker->had_error;
}
int vp9_worker_reset(VP9Worker* const worker) {
int ok = 1;
worker->had_error = 0;
if (worker->status_ < OK) {
#if CONFIG_MULTITHREAD
if (pthread_mutex_init(&worker->mutex_, NULL) ||
pthread_cond_init(&worker->condition_, NULL)) {
return 0;
}
pthread_mutex_lock(&worker->mutex_);
ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker);
if (ok) worker->status_ = OK;
pthread_mutex_unlock(&worker->mutex_);
#else
worker->status_ = OK;
#endif
} else if (worker->status_ > OK) {
ok = vp9_worker_sync(worker);
}
assert(!ok || (worker->status_ == OK));
return ok;
}
void vp9_worker_launch(VP9Worker* const worker) {
#if CONFIG_MULTITHREAD
change_state(worker, WORK);
#else
if (worker->hook)
worker->had_error |= !worker->hook(worker->data1, worker->data2);
#endif
}
void vp9_worker_end(VP9Worker* const worker) {
if (worker->status_ >= OK) {
#if CONFIG_MULTITHREAD
change_state(worker, NOT_OK);
pthread_join(worker->thread_, NULL);
pthread_mutex_destroy(&worker->mutex_);
pthread_cond_destroy(&worker->condition_);
#else
worker->status_ = NOT_OK;
#endif
}
assert(worker->status_ == NOT_OK);
}
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Multi-threaded worker
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h
#ifndef VP9_DECODER_VP9_THREAD_H_
#define VP9_DECODER_VP9_THREAD_H_
#include "vpx_config.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if CONFIG_MULTITHREAD
#if defined(_WIN32)
#include <windows.h>
typedef HANDLE pthread_t;
typedef CRITICAL_SECTION pthread_mutex_t;
typedef struct {
HANDLE waiting_sem_;
HANDLE received_sem_;
HANDLE signal_event_;
} pthread_cond_t;
#else
#include <pthread.h>
#endif /* _WIN32 */
#endif /* CONFIG_MULTITHREAD */
// State of the worker thread object
typedef enum {
NOT_OK = 0, // object is unusable
OK, // ready to work
WORK // busy finishing the current task
} VP9WorkerStatus;
// Function to be called by the worker thread. Takes two opaque pointers as
// arguments (data1 and data2), and should return false in case of error.
typedef int (*VP9WorkerHook)(void*, void*);
// Synchronize object used to launch job in the worker thread
typedef struct {
#if CONFIG_MULTITHREAD
pthread_mutex_t mutex_;
pthread_cond_t condition_;
pthread_t thread_;
#endif
VP9WorkerStatus status_;
VP9WorkerHook hook; // hook to call
void* data1; // first argument passed to 'hook'
void* data2; // second argument passed to 'hook'
int had_error; // return value of the last call to 'hook'
} VP9Worker;
// Must be called first, before any other method.
void vp9_worker_init(VP9Worker* const worker);
// Must be called to initialize the object and spawn the thread. Re-entrant.
// Will potentially launch the thread. Returns false in case of error.
int vp9_worker_reset(VP9Worker* const worker);
// Makes sure the previous work is finished. Returns true if worker->had_error
// was not set and no error condition was triggered by the working thread.
int vp9_worker_sync(VP9Worker* const worker);
// Triggers the thread to call hook() with data1 and data2 argument. These
// hook/data1/data2 can be changed at any time before calling this function,
// but not be changed afterward until the next call to vp9_worker_sync().
void vp9_worker_launch(VP9Worker* const worker);
// Kill the thread and terminate the object. To use the object again, one
// must call vp9_worker_reset() again.
void vp9_worker_end(VP9Worker* const worker);
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
#endif /* VP9_DECODER_VP9_THREAD_H_ */
......@@ -28,6 +28,8 @@ VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h
VP9_DX_SRCS-yes += decoder/vp9_thread.c
VP9_DX_SRCS-yes += decoder/vp9_thread.h
VP9_DX_SRCS-yes += decoder/vp9_treereader.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment