From f76f52df61dfbec0113727196c4a0e030dd15205 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Thu, 22 Aug 2013 17:23:02 +0100
Subject: [PATCH] Limit Key frame Intra modes checks.

Most of the focus so far has been on inter frames.

At high speed settings the key frame is now taking a high %
of the cycles.

This patch puts in some masking to reduce the number
of INTRA modes searched during key frame coding (as already
happens for inter frames) at higher speed settings

TODO: Develop this further with either adaptive rd thresholds
when choosing which intra modes to consider or some other
heuristic.

Impact.
At high speed settings on some clips the key frame was starting
to dominate. In a coding of the first 50 frames of AKIYO at speed
2 limiting the key frame intra modes to DC or TM_PRED resulted in
~30% overall speedup. For Bus the number was lower at ~4-5%.

Change-Id: I7bde68aee04995f9d9beb13a1902143112e341e2
---
 vp9/encoder/vp9_onyx_if.c  | 11 +++++++++--
 vp9/encoder/vp9_onyx_int.h |  8 +++++++-
 vp9/encoder/vp9_rdopt.c    | 17 ++++++++++++++---
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 111325adca..27ba8fe8d8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -735,7 +735,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->mode_search_skip_flags = 0;
   sf->disable_split_var_thresh = 0;
   sf->disable_filter_search_var_thresh = 0;
-  sf->last_chroma_intra_mode = TM_PRED;
+  sf->intra_y_mode_mask = ALL_INTRA_MODES;
+  sf->intra_uv_mode_mask = ALL_INTRA_MODES;
   sf->use_rd_breakout = 0;
   sf->skip_encode_sb = 0;
   sf->use_uv_intra_rd_estimate = 0;
@@ -798,6 +799,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         // the main framework of partition search type.
         sf->disable_split_var_thresh = 0;
         sf->disable_filter_search_var_thresh = 16;
+
+        sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
+        sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
       }
       if (speed == 2) {
         sf->adjust_thresholds_by_speed = 1;
@@ -819,7 +823,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
                                      FLAG_SKIP_COMP_REFMISMATCH |
                                      FLAG_SKIP_INTRA_LOWVAR |
                                      FLAG_EARLY_TERMINATE;
-        sf->last_chroma_intra_mode = DC_PRED;
+        sf->intra_y_mode_mask = INTRA_DC_TM;
+        sf->intra_uv_mode_mask = INTRA_DC_TM;
         sf->use_uv_intra_rd_estimate = 1;
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
@@ -859,6 +864,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->subpel_iters_per_step = 1;
         sf->disable_split_var_thresh = 64;
         sf->disable_filter_search_var_thresh = 64;
+        sf->intra_y_mode_mask = INTRA_DC_ONLY;
+        sf->intra_uv_mode_mask = INTRA_DC_ONLY;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 1b0e5cea00..dcbf3887bd 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -238,6 +238,11 @@ typedef enum {
   // Other methods to come
 } SUBPEL_SEARCH_METHODS;
 
+#define ALL_INTRA_MODES 0x3FF
+#define INTRA_DC_ONLY 0x01
+#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
+#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
+
 typedef struct {
   int RD;
   SEARCH_METHODS search_method;
@@ -288,7 +293,8 @@ typedef struct {
   // A source variance threshold below which filter search is disabled
   // Choose a very large value (UINT_MAX) to use 8-tap always
   unsigned int disable_filter_search_var_thresh;
-  MB_PREDICTION_MODE last_chroma_intra_mode;
+  int intra_y_mode_mask;
+  int intra_uv_mode_mask;
   int use_rd_breakout;
   int use_uv_intra_rd_estimate;
   int use_fast_lpf_pick;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d3d0afe734..1bf3b59107 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1043,6 +1043,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     int64_t this_rd;
     int ratey = 0;
+
+    if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
+      continue;
+
     // Only do the oblique modes if the best so far is
     // one of the neighboring directional modes
     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -1228,6 +1232,9 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
     int64_t local_tx_cache[TX_MODES];
     const int mis = xd->mode_info_stride;
 
+    if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
+      continue;
+
     if (cpi->common.frame_type == KEY_FRAME) {
       const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
       const MB_PREDICTION_MODE L = xd->left_available ?
@@ -1325,10 +1332,14 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
 
-  MB_PREDICTION_MODE last_mode = bsize <= BLOCK_8X8 ?
-              TM_PRED : cpi->sf.last_chroma_intra_mode;
+  // int mode_mask = (bsize <= BLOCK_8X8)
+  //                ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
+
+  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+    // if (!(mode_mask & (1 << mode)))
+    if (!(cpi->sf.intra_uv_mode_mask & (1 << mode)))
+      continue;
 
-  for (mode = DC_PRED; mode <= last_mode; mode++) {
     x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
     super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
                      &this_distortion, &s, &this_sse, bsize, best_rd);
-- 
GitLab