From 95a89994cfab7df8cdab40a606007766f09aa99e Mon Sep 17 00:00:00 2001
From: Steinar Midtskogen <stemidts@cisco.com>
Date: Tue, 16 Feb 2016 08:19:24 +0100
Subject: [PATCH] Enable CLPF

This commit adds the constrained low-pass filter. For details, please
see document here: https://goo.gl/VUzEED

Change-Id: Idc21d62052071e6c939c91c0cc8246eed768e6b0
---
 configure                  |  1 +
 vp10/common/onyxc_int.h    |  4 +++
 vp10/decoder/decodeframe.c | 16 ++++++++++
 vp10/encoder/bitstream.c   | 13 ++++++++
 vp10/encoder/encoder.c     | 62 ++++++++++++++++++++++++++++++++++++++
 vp10/vp10_common.mk        |  2 ++
 6 files changed, 98 insertions(+)

diff --git a/configure b/configure
index 8c05729635..5f8e2e32ae 100755
--- a/configure
+++ b/configure
@@ -250,6 +250,7 @@ EXPERIMENT_LIST="
     fp_mb_stats
     emulate_hardware
     misc_fixes
+    clpf
 "
 CONFIG_LIST="
     dependency_tracking
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 4570a58af5..7c126e114a 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -142,6 +142,10 @@ typedef struct VP10Common {
   int use_highbitdepth;  // Marks if we need to use 16bit frame buffers.
 #endif
 
+#if CONFIG_CLPF
+  int clpf;
+#endif
+
   YV12_BUFFER_CONFIG *frame_to_show;
   RefCntBuffer *prev_frame;
 
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 719c061419..59e0c9be0b 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -36,6 +36,9 @@
 #include "vp10/common/reconinter.h"
 #include "vp10/common/seg_common.h"
 #include "vp10/common/tile_common.h"
+#if CONFIG_CLPF
+#include "vp10/common/clpf.h"
+#endif
 
 #include "vp10/decoder/decodeframe.h"
 #include "vp10/decoder/detokenize.h"
@@ -1094,6 +1097,12 @@ static void setup_loopfilter(struct loopfilter *lf,
   }
 }
 
+#if CONFIG_CLPF
+static void setup_clpf(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+  cm->clpf = vpx_rb_read_literal(rb, 1);
+}
+#endif
+
 static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
   return vpx_rb_read_bit(rb)
              ? vpx_rb_read_inv_signed_literal(rb, CONFIG_MISC_FIXES ? 6 : 4)
@@ -1543,6 +1552,10 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, const uint8_t *data,
     lf_data->stop = cm->mi_rows;
     winterface->execute(&pbi->lf_worker);
   }
+#if CONFIG_CLPF
+  if (cm->clpf && !cm->skip_loop_filter)
+    vp10_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
+#endif
 
   // Get last tile data.
   tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
@@ -2026,6 +2039,9 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
     vp10_setup_past_independence(cm);
 
   setup_loopfilter(&cm->lf, rb);
+#if CONFIG_CLPF
+  setup_clpf(cm, rb);
+#endif
   setup_quantization(cm, rb);
 #if CONFIG_VPX_HIGHBITDEPTH
   xd->bd = (int)cm->bit_depth;
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index e6ee10a5a4..9f7b6b3997 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -26,6 +26,9 @@
 #include "vp10/common/pred_common.h"
 #include "vp10/common/seg_common.h"
 #include "vp10/common/tile_common.h"
+#if CONFIG_CLPF
+#include "vp10/common/clpf.h"
+#endif
 
 #include "vp10/encoder/cost.h"
 #include "vp10/encoder/bitstream.h"
@@ -838,6 +841,13 @@ static void encode_loopfilter(struct loopfilter *lf,
   }
 }
 
+#if CONFIG_CLPF
+static void encode_clpf(const VP10_COMMON *cm,
+                        struct vpx_write_bit_buffer *wb) {
+  vpx_wb_write_literal(wb, cm->clpf, 1);
+}
+#endif
+
 static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
   if (delta_q != 0) {
     vpx_wb_write_bit(wb, 1);
@@ -1297,6 +1307,9 @@ static void write_uncompressed_header(VP10_COMP *cpi,
   vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
 
   encode_loopfilter(&cm->lf, wb);
+#if CONFIG_CLPF
+  encode_clpf(cm, wb);
+#endif
   encode_quantization(cm, wb);
   encode_segmentation(cm, xd, wb);
 #if CONFIG_MISC_FIXES
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index af68409b9d..4231d4bcf6 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -15,6 +15,9 @@
 #include "./vpx_config.h"
 
 #include "vp10/common/alloccommon.h"
+#if CONFIG_CLPF
+#include "vp10/common/clpf.h"
+#endif
 #include "vp10/common/filter.h"
 #include "vp10/common/idct.h"
 #include "vp10/common/reconinter.h"
@@ -2426,6 +2429,65 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
       vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
   }
 
+#if CONFIG_CLPF
+  cm->clpf = 0;
+  if (!is_lossless_requested(&cpi->oxcf)) {
+    // Test CLPF
+    int i, hq = 1;
+    uint64_t before, after;
+    // TODO(yaowu): investigate per-segment CLPF decision and
+    // an optimal threshold, use 80 for now.
+    for (i = 0; i < MAX_SEGMENTS; i++)
+      hq &= vp10_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
+
+    if (!hq) {  // Don't try filter if the entire image is nearly losslessly
+                // encoded
+#if CLPF_FILTER_ALL_PLANES
+      vpx_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
+      before =
+          get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
+                  cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+                  cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
+          get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
+                  cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
+                  cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
+          get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
+                  cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
+                  cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
+      vp10_clpf_frame(cm->frame_to_show, cm, xd);
+      after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
+                      cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+                      cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
+              get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
+                      cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
+                      cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
+              get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
+                      cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
+                      cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
+#else
+      vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+      before = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
+                       cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+                       cpi->Source->y_crop_width, cpi->Source->y_crop_height);
+      vp10_clpf_frame(cm->frame_to_show, cm, xd);
+      after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
+                      cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+                      cpi->Source->y_crop_width, cpi->Source->y_crop_height);
+#endif
+      if (before < after) {
+// No improvement, restore original
+#if CLPF_FILTER_ALL_PLANES
+        vpx_yv12_copy_frame(&cpi->last_frame_uf, cm->frame_to_show);
+#else
+        vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+#endif
+      } else {
+        cm->clpf = 1;
+      }
+    }
+  }
+#endif
+
   vpx_extend_frame_inner_borders(cm->frame_to_show);
 }
 
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 5d8f0c41bc..dcb2dd9151 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -60,6 +60,8 @@ VP10_COMMON_SRCS-yes += common/scan.c
 VP10_COMMON_SRCS-yes += common/scan.h
 VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
 VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
+VP10_COMMON_SRCS-yes += common/clpf.c
+VP10_COMMON_SRCS-yes += common/clpf.h
 
 ifneq ($(CONFIG_VPX_HIGHBITDEPTH),yes)
 VP10_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/itrans4_dspr2.c
-- 
GitLab