From 87aa846b47a61ba5480ffbae058222720754be5c Mon Sep 17 00:00:00 2001
From: Deb Mukherjee <debargha@google.com>
Date: Tue, 20 Dec 2011 14:50:31 -0800
Subject: [PATCH] Multiframe quality enhancement postprocessing

Adds a multiframe postprocessing module to enhance the quality of
certain frames that are coded at lower quality than preceding frames.
The module can be invoked from the commandline by use of the --mfqe
option, and will be most beneficial for enhancing the quality of
frames decoded using scalable patterns.

Uses the vp8_variance_var16x16 and vp8_variance_sad16x16 function
pointers to compute SAD and Variance of blocks.

Change-Id: Id73d2a6e3572d07f9f8e36bbce00a4fc5ffd8961
---
 examples/postproc.txt |   2 +-
 vp8/common/postproc.c | 227 ++++++++++++++++++++++++++++++++++++++++--
 vp8/common/postproc.h |   1 +
 vp8/common/ppflags.h  |   3 +-
 vp8/vp8_dx_iface.c    |   2 +-
 vpx/vp8.h             |   1 +
 vpxdec.c              |   9 +-
 7 files changed, 234 insertions(+), 11 deletions(-)

diff --git a/examples/postproc.txt b/examples/postproc.txt
index 0940ea24c3..51b251a045 100644
--- a/examples/postproc.txt
+++ b/examples/postproc.txt
@@ -58,7 +58,7 @@ if(frame_cnt%30 == 1) {
     if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
         die_codec(&codec, "Failed to turn off postproc");
 } else if(frame_cnt%30 == 16) {
-    vp8_postproc_cfg_t  pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK, 4, 0};
+    vp8_postproc_cfg_t  pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0};
 
     if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
         die_codec(&codec, "Failed to turn on postproc");
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index ace4c113ce..0ef300998d 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -12,9 +12,12 @@
 #include "vpx_config.h"
 #include "vpx_scale/yv12config.h"
 #include "postproc.h"
+#include "common.h"
+#include "recon.h"
 #include "vpx_scale/yv12extend.h"
 #include "vpx_scale/vpxscale.h"
 #include "systemdependent.h"
+#include "../encoder/variance.h"
 
 #include <math.h>
 #include <stdlib.h>
@@ -121,7 +124,6 @@ const short vp8_rv[] =
     0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
 };
 
-
 extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
 extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
 /***********************************************************************************************************
@@ -323,11 +325,11 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG         *source,
 }
 
 void vp8_deblock(YV12_BUFFER_CONFIG         *source,
-                        YV12_BUFFER_CONFIG         *post,
-                        int                         q,
-                        int                         low_var_thresh,
-                        int                         flag,
-                        vp8_postproc_rtcd_vtable_t *rtcd)
+                 YV12_BUFFER_CONFIG         *post,
+                 int                         q,
+                 int                         low_var_thresh,
+                 int                         flag,
+                 vp8_postproc_rtcd_vtable_t *rtcd)
 {
     double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
     int ppl = (int)(level + .5);
@@ -671,6 +673,210 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
     }
 }
 
+static void multiframe_quality_enhance_block
+(
+    int blksize, /* Currently only values supported are 16, 8, 4 */
+    int qcurr,
+    int qprev,
+    unsigned char *y,
+    unsigned char *u,
+    unsigned char *v,
+    int y_stride,
+    int uv_stride,
+    unsigned char *yd,
+    unsigned char *ud,
+    unsigned char *vd,
+    int yd_stride,
+    int uvd_stride
+)
+{
+    static const unsigned char VP8_ZEROS[16]=
+    {
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+    };
+    int blksizeby2 = blksize >> 1;
+    int blksizesq = blksize * blksize;
+
+    int i, j;
+    unsigned char *yp;
+    unsigned char *ydp;
+    unsigned char *up;
+    unsigned char *udp;
+    unsigned char *vp;
+    unsigned char *vdp;
+
+    unsigned int act, sse, sad, thr;
+    if (blksize == 16)
+    {
+        act = vp8_variance_var16x16(y, y_stride, VP8_ZEROS, 0, &sse);
+        sad = vp8_variance_sad16x16(y, y_stride, yd, yd_stride, 0);
+    }
+    else if (blksize == 8)
+    {
+        act = vp8_variance_var8x8(y, y_stride, VP8_ZEROS, 0, &sse);
+        sad = vp8_variance_sad8x8(y, y_stride, yd, yd_stride, 0);
+    }
+    else
+    {
+        act = vp8_variance_var4x4(y, y_stride, VP8_ZEROS, 0, &sse);
+        sad = vp8_variance_sad4x4(y, y_stride, yd, yd_stride, 0);
+    }
+
+    thr = 6 * blksizesq + (act >> 3);
+    if (thr > 12 * blksizesq) thr = 12 * blksizesq;
+    // These thresholds should be adapted later based on qcurr and qprev
+    if (sad < thr)
+    {
+        static const int precision = 4;
+        static const int roundoff = (1 << (precision - 1));
+        int ifactor = (sad << precision) / thr;
+        // TODO: SIMD optimize this section
+        if (ifactor)
+        {
+            int icfactor = (1 << precision) - ifactor;
+            for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride)
+            {
+                for (j = 0; j < blksize; ++j)
+                    ydp[j] = (int)((yp[j] * ifactor + ydp[j] * icfactor + roundoff) >> precision);
+            }
+            for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
+            {
+                for (j = 0; j < blksizeby2; ++j)
+                    udp[j] = (int)((up[j] * ifactor + udp[j] * icfactor + roundoff) >> precision);
+            }
+            for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
+            {
+                for (j = 0; j < blksizeby2; ++j)
+                    vdp[j] = (int)((vp[j] * ifactor + vdp[j] * icfactor + roundoff) >> precision);
+            }
+        }
+    }
+    else
+    {
+        if (blksize == 16)
+        {
+            vp8_recon_copy16x16(y, y_stride, yd, yd_stride);
+            vp8_recon_copy8x8(u, uv_stride, ud, uvd_stride);
+            vp8_recon_copy8x8(v, uv_stride, vd, uvd_stride);
+        }
+        else if (blksize == 8)
+        {
+            vp8_recon_copy8x8(y, y_stride, yd, yd_stride);
+            for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
+                vpx_memcpy(udp, up, blksizeby2);
+            for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
+                vpx_memcpy(vdp, vp, blksizeby2);
+        }
+        else
+        {
+            for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride)
+                vpx_memcpy(ydp, yp, blksize);
+            for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
+                vpx_memcpy(udp, up, blksizeby2);
+            for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
+                vpx_memcpy(vdp, vp, blksizeby2);
+        }
+    }
+}
+
+void vp8_multiframe_quality_enhance
+(
+    VP8_COMMON *cm
+)
+{
+    YV12_BUFFER_CONFIG *show = cm->frame_to_show;
+    YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
+
+    FRAME_TYPE frame_type = cm->frame_type;
+    /* Point at base of Mb MODE_INFO list has motion vectors etc */
+    const MODE_INFO *mode_info_context = cm->mi;
+    int qcurr = cm->base_qindex;
+    int qprev = cm->postproc_state.last_base_qindex;
+
+    int mb_row;
+    int mb_col;
+
+    unsigned char *y_ptr, *u_ptr, *v_ptr;
+    unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
+
+    /* Set up the buffer pointers */
+    y_ptr = show->y_buffer;
+    u_ptr = show->u_buffer;
+    v_ptr = show->v_buffer;
+    yd_ptr = dest->y_buffer;
+    ud_ptr = dest->u_buffer;
+    vd_ptr = dest->v_buffer;
+
+    /* postprocess each macro block */
+    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+    {
+        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+        {
+            /* if motion is high there will likely be no benefit */
+            if (((frame_type == INTER_FRAME &&
+                  abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 &&
+                  abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) ||
+                 (frame_type == KEY_FRAME)) &&
+                mode_info_context->mbmi.mode != B_PRED)
+            {
+                multiframe_quality_enhance_block(16,
+                                                 qcurr,
+                                                 qprev,
+                                                 y_ptr,
+                                                 u_ptr,
+                                                 v_ptr,
+                                                 show->y_stride,
+                                                 show->uv_stride,
+                                                 yd_ptr,
+                                                 ud_ptr,
+                                                 vd_ptr,
+                                                 dest->y_stride,
+                                                 dest->uv_stride);
+            }
+            else if (mode_info_context->mbmi.mode == B_PRED)
+            {
+                int i, j;
+                for (i=0; i<2; ++i)
+                    for (j=0; j<2; ++j)
+                        multiframe_quality_enhance_block(8,
+                                                         qcurr,
+                                                         qprev,
+                                                         y_ptr + 8*(i*show->y_stride+j),
+                                                         u_ptr + 4*(i*show->uv_stride+j),
+                                                         v_ptr + 4*(i*show->uv_stride+j),
+                                                         show->y_stride,
+                                                         show->uv_stride,
+                                                         yd_ptr + 8*(i*dest->y_stride+j),
+                                                         ud_ptr + 4*(i*dest->uv_stride+j),
+                                                         vd_ptr + 4*(i*dest->uv_stride+j),
+                                                         dest->y_stride,
+                                                         dest->uv_stride);
+            }
+            else
+            {
+                vp8_recon_copy16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
+                vp8_recon_copy8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
+                vp8_recon_copy8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
+            }
+            y_ptr += 16;
+            u_ptr += 8;
+            v_ptr += 8;
+            yd_ptr += 16;
+            ud_ptr += 8;
+            vd_ptr += 8;
+            mode_info_context++;     /* step to next MB */
+        }
+
+        y_ptr += show->y_stride  * 16 - 16 * cm->mb_cols;
+        u_ptr += show->uv_stride *  8 - 8 * cm->mb_cols;
+        v_ptr += show->uv_stride *  8 - 8 * cm->mb_cols;
+        yd_ptr += dest->y_stride  * 16 - 16 * cm->mb_cols;
+        ud_ptr += dest->uv_stride *  8 - 8 * cm->mb_cols;
+        vd_ptr += dest->uv_stride *  8 - 8 * cm->mb_cols;
+
+        mode_info_context++;         /* Skip border mb */
+    }
+}
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
@@ -699,8 +905,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
         dest->y_width = oci->Width;
         dest->y_height = oci->Height;
         dest->uv_height = dest->y_height / 2;
+        oci->postproc_state.last_base_qindex = oci->base_qindex;
         return 0;
-
     }
 
 #if ARCH_X86||ARCH_X86_64
@@ -717,6 +923,12 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
         vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer,
                     q, 1, 0, RTCD_VTABLE(oci));
     }
+    else if ((flags & VP8D_MFQE) &&
+             oci->current_video_frame >= 2 &&
+             oci->base_qindex - oci->postproc_state.last_base_qindex >= 10)
+    {
+        vp8_multiframe_quality_enhance(oci);
+    }
     else
     {
         vp8_yv12_copy_frame_ptr(oci->frame_to_show, &oci->post_proc_buffer);
@@ -1105,5 +1317,6 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
     dest->y_width = oci->Width;
     dest->y_height = oci->Height;
     dest->uv_height = dest->y_height / 2;
+    oci->postproc_state.last_base_qindex = oci->base_qindex;
     return 0;
 }
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index c641b9ca59..d5aaf62166 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -104,6 +104,7 @@ struct postproc_state
     int           last_q;
     int           last_noise;
     char          noise[3072];
+    int           last_base_qindex;
     DECLARE_ALIGNED(16, char, blackclamp[16]);
     DECLARE_ALIGNED(16, char, whiteclamp[16]);
     DECLARE_ALIGNED(16, char, bothclamp[16]);
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 65b0cab6a4..665e21fd96 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -23,7 +23,8 @@ enum
     VP8D_DEBUG_TXT_RATE_INFO    = 1<<6,
     VP8D_DEBUG_DRAW_MV          = 1<<7,
     VP8D_DEBUG_CLR_BLK_MODES    = 1<<8,
-    VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
+    VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
+    VP8D_MFQE                   = 1<<10
 };
 
 typedef struct
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 54bdb85684..43ea9a12ea 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -412,7 +412,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
                 && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
             {
                 ctx->postproc_cfg.post_proc_flag =
-                    VP8_DEBLOCK | VP8_DEMACROBLOCK;
+                    VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE;
                 ctx->postproc_cfg.deblocking_level = 4;
                 ctx->postproc_cfg.noise_level = 0;
             }
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 983cc4ad41..eec979763b 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -63,6 +63,7 @@ enum vp8_postproc_level
     VP8_DEBUG_TXT_MBLK_MODES    = 1<<4, /**< print macro block modes over each macro block */
     VP8_DEBUG_TXT_DC_DIFF       = 1<<5, /**< print dc diff for each macro block */
     VP8_DEBUG_TXT_RATE_INFO     = 1<<6, /**< print video rate info (encoder only) */
+    VP8_MFQE                    = 1<<10,
 };
 
 /*!\brief post process flags
diff --git a/vpxdec.c b/vpxdec.c
index 7401101f8b..4482f3dc71 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -124,11 +124,13 @@ static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
                                        "Display only selected block modes");
 static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
                                        "Draw only selected motion vectors");
+static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
+                                       "Enable multiframe quality enhancement");
 
 static const arg_def_t *vp8_pp_args[] =
 {
     &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
-    &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs,
+    &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe,
     NULL
 };
 #endif
@@ -803,6 +805,11 @@ int main(int argc, const char **argv_)
             postproc = 1;
             vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
         }
+        else if (arg_match(&arg, &mfqe, argi))
+        {
+            postproc = 1;
+            vp8_pp_cfg.post_proc_flag |= VP8_MFQE;
+        }
         else if (arg_match(&arg, &pp_debug_info, argi))
         {
             unsigned int level = arg_parse_uint(&arg);
-- 
GitLab