Commit ba2e107d authored by Yunqing Wang's avatar Yunqing Wang

First modification of multi-thread decoder

This is the first modification of VP8 multi-thread decoder, which uses
same threads to decode macroblocks and then do loopfiltering for each
frame.

Inspired by Rob Clark, synchronization was done on every 8 macroblocks
instead of every macroblock to reduce lock contention.

Comparing with the original code, this implementation gave about 15%-
20% performance gain while decoding my test clips on a Core2 Quad
platform (Linux).

The work is not done yet.

Test on other platforms are needed.

Change-Id: Ice9ddb0b511af1359b9f71e65066143c04fef3b5
parent 618c7d27
......@@ -18,6 +18,7 @@
extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
MACROBLOCKD *xd);
extern void vp8_mt_loop_filter_frame(VP8D_COMP *pbi);
extern void vp8_stop_lfthread(VP8D_COMP *pbi);
extern void vp8_start_lfthread(VP8D_COMP *pbi);
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
......
......@@ -367,6 +367,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
return -1;
}
/*
if (!pbi->b_multithreaded_lf)
{
struct vpx_usec_timer lpftimer;
......@@ -378,12 +379,42 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
vpx_usec_timer_mark(&lpftimer);
pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
}else{
struct vpx_usec_timer lpftimer;
vpx_usec_timer_start(&lpftimer);
// Apply the loop filter if appropriate.
if (cm->filter_level > 0)
vp8_mt_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
vpx_usec_timer_mark(&lpftimer);
pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
}
if (cm->filter_level > 0) {
cm->last_frame_type = cm->frame_type;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
}
*/
if(pbi->common.filter_level)
{
struct vpx_usec_timer lpftimer;
vpx_usec_timer_start(&lpftimer);
// Apply the loop filter if appropriate.
if (pbi->b_multithreaded_lf && cm->multi_token_partition != ONE_PARTITION)
vp8_mt_loop_filter_frame(pbi); //cm, &pbi->mb, cm->filter_level);
else
vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
vpx_usec_timer_mark(&lpftimer);
pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
cm->last_frame_type = cm->frame_type;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
}
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
......
......@@ -95,20 +95,22 @@ typedef struct VP8Decompressor
int current_mb_col_main;
int decoding_thread_count;
int allocated_decoding_thread_count;
int *current_mb_col; //Each row remembers its already decoded column.
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
// variable for threading
DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
#if CONFIG_MULTITHREAD
pthread_t h_thread_lpf; // thread for postprocessing
sem_t h_event_lpf; // Event for post_proc completed
sem_t h_event_start_lpf;
//pthread_t h_thread_lpf; // thread for postprocessing
sem_t h_event_end_lpf; // Event for post_proc completed
sem_t *h_event_start_lpf;
#endif
MB_ROW_DEC *mb_row_di;
DECODETHREAD_DATA *de_thread_data;
#if CONFIG_MULTITHREAD
pthread_t *h_decoding_thread;
sem_t *h_event_mbrdecoding;
sem_t h_event_main;
sem_t *h_event_start_decoding;
sem_t h_event_end_decoding;
// end of threading data
#endif
vp8_reader *mbc;
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment