rdo.rs 46.7 KB
Newer Older
Guillaume Martres's avatar
Guillaume Martres committed
1
// Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
2
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
Guillaume Martres's avatar
Guillaume Martres committed
3
4
5
6
7
8
9
10
11
12
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

#![allow(non_camel_case_types)]

13
use crate::api::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::cdef::*;
15
use crate::lrf::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
16
17
use crate::context::*;
use crate::ec::{OD_BITRES, Writer, WriterCounter};
18
use crate::header::ReferenceMode;
19
use crate::{encode_block_pre_cdef, encode_block_post_cdef};
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::encode_block_with_modes;
Luca Barbato's avatar
Luca Barbato committed
21
22
use crate::encoder::FrameInvariants;
use crate::frame::Frame;
Raphaël Zumer's avatar
Raphaël Zumer committed
23
use crate::luma_ac;
Raphaël Zumer's avatar
Raphaël Zumer committed
24
use crate::mc::MotionVector;
Raphaël Zumer's avatar
Raphaël Zumer committed
25
26
27
use crate::me::*;
use crate::motion_compensate;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
28
use crate::partition::RefType::*;
29
use crate::frame::*;
30
use crate::predict::{RAV1E_INTRA_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTER_COMPOUND_MODES, PredictionMode};
Raphaël Zumer's avatar
Raphaël Zumer committed
31
32
33
use crate::Tune;
use crate::write_tx_blocks;
use crate::write_tx_tree;
Romain Vimont's avatar
Romain Vimont committed
34
use crate::tiling::*;
35
use crate::transform::{RAV1E_TX_TYPES, TxSet, TxSize, TxType};
36
use crate::util::{AlignedArray, CastFromPrimitive, Pixel, UninitializedAlignedArray};
37
use crate::rdo_tables::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
38
39

use std;
40
use std::cmp;
Raphaël Zumer's avatar
Raphaël Zumer committed
41
use std::vec::Vec;
Raphaël Zumer's avatar
Raphaël Zumer committed
42
use crate::partition::PartitionType::*;
43
use arrayvec::*;
Luca Barbato's avatar
Luca Barbato committed
44
use serde_derive::{Serialize, Deserialize};
Guillaume Martres's avatar
Guillaume Martres committed
45

46
#[derive(Copy,Clone,PartialEq)]
47
48
pub enum RDOType {
  PixelDistRealRate,
49
50
51
  TxDistRealRate,
  TxDistEstRate,
  Train
52
53
54
}

impl RDOType {
55
  pub fn needs_tx_dist(self) -> bool {
56
57
58
59
    match self {
      // Pixel-domain distortion and exact ec rate
      RDOType::PixelDistRealRate => false,
      // Tx-domain distortion and exact ec rate
60
61
62
63
64
65
66
67
68
69
70
71
      RDOType::TxDistRealRate => true,
      // Tx-domain distortion and txdist-based rate
      RDOType::TxDistEstRate => true,
      RDOType::Train => true,
    }
  }
  pub fn needs_coeff_rate(self) -> bool {
    match self {
      RDOType::PixelDistRealRate => true,
      RDOType::TxDistRealRate => true,
      RDOType::TxDistEstRate => false,
      RDOType::Train => true,
72
73
74
75
    }
  }
}

76
#[derive(Clone)]
77
pub struct RDOOutput {
Michael Bebenita's avatar
Michael Bebenita committed
78
79
80
  pub rd_cost: f64,
  pub part_type: PartitionType,
  pub part_modes: Vec<RDOPartitionOutput>
81
82
83
84
}

#[derive(Clone)]
pub struct RDOPartitionOutput {
Michael Bebenita's avatar
Michael Bebenita committed
85
86
  pub rd_cost: f64,
  pub bo: BlockOffset,
87
  pub bsize: BlockSize,
Michael Bebenita's avatar
Michael Bebenita committed
88
89
  pub pred_mode_luma: PredictionMode,
  pub pred_mode_chroma: PredictionMode,
90
  pub pred_cfl_params: CFLParams,
Thomas Daede's avatar
Thomas Daede committed
91
  pub ref_frames: [RefType; 2],
fbossen's avatar
fbossen committed
92
  pub mvs: [MotionVector; 2],
93
94
95
  pub skip: bool,
  pub tx_size: TxSize,
  pub tx_type: TxType,
96
97
}

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct RDOTracker {
  rate_bins: Vec<Vec<Vec<u64>>>,
  rate_counts: Vec<Vec<Vec<u64>>>,
}

impl RDOTracker {
  pub fn new() -> RDOTracker {
    RDOTracker {
      rate_bins: vec![vec![vec![0; RDO_NUM_BINS]; TxSize::TX_SIZES_ALL]; RDO_QUANT_BINS],
      rate_counts: vec![vec![vec![0; RDO_NUM_BINS]; TxSize::TX_SIZES_ALL]; RDO_QUANT_BINS],
    }
  }
  fn merge_array(new: &mut Vec<u64>, old: &[u64]) {
    for (n, o) in new.iter_mut().zip(old.iter()) {
      *n += o;
    }
  }
  fn merge_2d_array(new: &mut Vec<Vec<u64>>, old: &[Vec<u64>]) {
    for (n, o) in new.iter_mut().zip(old.iter()) {
      RDOTracker::merge_array(n, o);
    }
  }
  fn merge_3d_array(new: &mut Vec<Vec<Vec<u64>>>, old: &[Vec<Vec<u64>>]) {
    for (n, o) in new.iter_mut().zip(old.iter()) {
      RDOTracker::merge_2d_array(n, o);
    }
  }
  pub fn merge_in(&mut self, input: &RDOTracker) {
    RDOTracker::merge_3d_array(&mut self.rate_bins, &input.rate_bins);
    RDOTracker::merge_3d_array(&mut self.rate_counts, &input.rate_counts);
  }
  pub fn add_rate(&mut self, qindex: u8, ts: TxSize, fast_distortion: u64, rate: u64) {
    if fast_distortion != 0 {
      let bs_index = ts as usize;
      let q_bin_idx = (qindex as usize)/RDO_QUANT_DIV;
      let bin_idx_tmp = ((fast_distortion as i64 - (RATE_EST_BIN_SIZE as i64) / 2) as u64 / RATE_EST_BIN_SIZE) as usize;
      let bin_idx = if bin_idx_tmp >= RDO_NUM_BINS {
        RDO_NUM_BINS - 1
      } else {
        bin_idx_tmp
      };
      self.rate_counts[q_bin_idx][bs_index][bin_idx] += 1;
      self.rate_bins[q_bin_idx][bs_index][bin_idx] += rate;
    }
  }
  pub fn print_code(&self) {
    println!("pub static RDO_RATE_TABLE: [[[u64; RDO_NUM_BINS]; TxSize::TX_SIZES_ALL]; RDO_QUANT_BINS] = [");
    for q_bin in 0..RDO_QUANT_BINS {
      print!("[");
      for bs_index in 0..TxSize::TX_SIZES_ALL {
        print!("[");
        for (rate_total, rate_count) in self.rate_bins[q_bin][bs_index].iter().zip(self.rate_counts[q_bin][bs_index].iter()) {
          if *rate_count > 100 {
            print!("{},", rate_total / rate_count);
          } else {
            print!("99999,");
          }
        }
        println!("],");
      }
      println!("],");
    }
    println!("];");
  }
}

pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 {
  let bs_index = ts as usize;
  let q_bin_idx = (qindex as usize)/RDO_QUANT_DIV;
  let bin_idx_down = ((fast_distortion) / RATE_EST_BIN_SIZE).min((RDO_NUM_BINS - 2) as u64);
  let bin_idx_up = (bin_idx_down + 1).min((RDO_NUM_BINS - 1) as u64);
  let x0 = (bin_idx_down * RATE_EST_BIN_SIZE) as i64;
  let x1 = (bin_idx_up * RATE_EST_BIN_SIZE) as i64;
  let y0 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_down as usize] as i64;
  let y1 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_up as usize] as i64;
  let slope = ((y1 - y0) << 8) / (x1 - x0);
  (y0 + (((fast_distortion as i64 - x0) * slope) >> 8)).max(0) as u64
}

178
#[allow(unused)]
179
fn cdef_dist_wxh_8x8<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
180
  src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, bit_depth: usize
181
) -> u64 {
182
  let coeff_shift = bit_depth - 8;
183

Jean-Marc Valin's avatar
Jean-Marc Valin committed
184
185
186
187
188
189
190
  let mut sum_s: i32 = 0;
  let mut sum_d: i32 = 0;
  let mut sum_s2: i64 = 0;
  let mut sum_d2: i64 = 0;
  let mut sum_sd: i64 = 0;
  for j in 0..8 {
    for i in 0..8 {
Romain Vimont's avatar
Romain Vimont committed
191
192
      let s: i32 = src1[j][i].as_();
      let d: i32 = src2[j][i].as_();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
193
194
      sum_s += s;
      sum_d += d;
195
196
197
      sum_s2 += (s * s) as i64;
      sum_d2 += (d * d) as i64;
      sum_sd += (s * d) as i64;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
198
199
200
201
202
203
    }
  }
  let svar = (sum_s2 - ((sum_s as i64 * sum_s as i64 + 32) >> 6)) as f64;
  let dvar = (sum_d2 - ((sum_d as i64 * sum_d as i64 + 32) >> 6)) as f64;
  let sse = (sum_d2 + sum_s2 - 2 * sum_sd) as f64;
  //The two constants were tuned for CDEF, but can probably be better tuned for use in general RDO
204
205
206
207
  let ssim_boost =
    (4033_f64 / 16_384_f64) *
    (svar + dvar + (16_384 << (2 * coeff_shift)) as f64) /
    f64::sqrt((16_265_089u64 << (4 * coeff_shift)) as f64 + svar * dvar);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
208
209
210
  (sse * ssim_boost + 0.5_f64) as u64
}

211
#[allow(unused)]
212
fn cdef_dist_wxh<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
213
  src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
214
  bit_depth: usize
215
) -> u64 {
216
217
218
  assert!(w & 0x7 == 0);
  assert!(h & 0x7 == 0);

Jean-Marc Valin's avatar
Jean-Marc Valin committed
219
  let mut sum: u64 = 0;
Romain Vimont's avatar
Romain Vimont committed
220
221
  for j in 0isize..h as isize / 8 {
    for i in 0isize..w as isize / 8 {
222
      sum += cdef_dist_wxh_8x8(
Romain Vimont's avatar
Romain Vimont committed
223
224
        &src1.subregion(Area::StartingAt { x: i * 8, y: j * 8 }),
        &src2.subregion(Area::StartingAt { x: i * 8, y: j * 8 }),
225
        bit_depth
226
      )
Jean-Marc Valin's avatar
Jean-Marc Valin committed
227
228
229
230
231
    }
  }
  sum
}

232
// Sum of Squared Error for a wxh block
233
pub fn sse_wxh<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
234
  src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize
235
) -> u64 {
236
237
238
  assert!(w & (MI_SIZE - 1) == 0);
  assert!(h & (MI_SIZE - 1) == 0);

Michael Bebenita's avatar
Michael Bebenita committed
239
240
  let mut sse: u64 = 0;
  for j in 0..h {
241
242
    let s1 = &src1[j][..w];
    let s2 = &src2[j][..w];
243

244
245
246
247
    let row_sse = s1
      .iter()
      .zip(s2)
      .map(|(&a, &b)| {
248
        let c = (i16::cast_from(a) - i16::cast_from(b)) as i32;
249
250
        (c * c) as u32
      }).sum::<u32>();
251
    sse += row_sse as u64;
Michael Bebenita's avatar
Michael Bebenita committed
252
253
  }
  sse
Guillaume Martres's avatar
Guillaume Martres committed
254
}
255

256
// Compute the pixel-domain distortion for an encode
257
fn compute_distortion<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
258
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w_y: usize, h_y: usize,
259
  is_chroma_block: bool, tile_bo: BlockOffset,
260
  luma_only: bool
261
) -> u64 {
262
263
  let input_region = ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo });
  let rec_region = ts.rec.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo });
264
  let mut distortion = match fi.config.tune {
265
266
    Tune::Psychovisual if w_y >= 8 && h_y >= 8 => {
      cdef_dist_wxh(
Romain Vimont's avatar
Romain Vimont committed
267
268
        &input_region,
        &rec_region,
269
270
271
272
273
274
        w_y,
        h_y,
        fi.sequence.bit_depth
      )
    }
    Tune::Psnr | Tune::Psychovisual => {
275
      sse_wxh(
Romain Vimont's avatar
Romain Vimont committed
276
277
        &input_region,
        &rec_region,
278
279
280
281
        w_y,
        h_y
      )
    }
Michael Bebenita's avatar
Michael Bebenita committed
282
  };
Michael Bebenita's avatar
Michael Bebenita committed
283

284
  if !luma_only {
Romain Vimont's avatar
Romain Vimont committed
285
    let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
286

287
288
289
    let mask = !(MI_SIZE - 1);
    let mut w_uv = (w_y >> xdec) & mask;
    let mut h_uv = (h_y >> ydec) & mask;
290

291
292
293
294
    if (w_uv == 0 || h_uv == 0) && is_chroma_block {
      w_uv = MI_SIZE;
      h_uv = MI_SIZE;
    }
295

296
297
298
299
    // Add chroma distortion only when it is available
    if w_uv > 0 && h_uv > 0 {
      for p in 1..3 {
        distortion += sse_wxh(
300
301
          &ts.input_tile.planes[p].subregion(Area::BlockStartingAt { bo: tile_bo }),
          &ts.rec.planes[p].subregion(Area::BlockStartingAt { bo: tile_bo }),
302
303
304
305
306
          w_uv,
          h_uv
        );
      }
    };
307
  }
308
  distortion
309
310
}

311
// Compute the transform-domain distortion for an encode
312
fn compute_tx_distortion<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
313
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w_y: usize, h_y: usize,
314
  is_chroma_block: bool, tile_bo: BlockOffset, tx_dist: i64,
315
  skip: bool, luma_only: bool
316
) -> u64 {
317
318
319
  assert!(fi.config.tune == Tune::Psnr);
  let mut distortion = if skip {
    sse_wxh(
320
321
      &ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo }),
      &ts.rec.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo }),
322
323
324
325
326
327
328
329
330
      w_y,
      h_y
    )
  } else {
    assert!(tx_dist >= 0);
    tx_dist as u64
  };

  if !luma_only && skip {
Romain Vimont's avatar
Romain Vimont committed
331
    let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
332
333
334
335
336
337
338
339
340
341
342
343
344
345

    let mask = !(MI_SIZE - 1);
    let mut w_uv = (w_y >> xdec) & mask;
    let mut h_uv = (h_y >> ydec) & mask;

    if (w_uv == 0 || h_uv == 0) && is_chroma_block {
      w_uv = MI_SIZE;
      h_uv = MI_SIZE;
    }

    // Add chroma distortion only when it is available
    if w_uv > 0 && h_uv > 0 {
      for p in 1..3 {
        distortion += sse_wxh(
346
347
          &ts.input_tile.planes[p].subregion(Area::BlockStartingAt { bo: tile_bo }),
          &ts.rec.planes[p].subregion(Area::BlockStartingAt { bo: tile_bo }),
348
349
350
351
352
353
          w_uv,
          h_uv
        );
      }
    }
  }
354
355
  distortion
}
356

357
fn compute_rd_cost<T: Pixel>(fi: &FrameInvariants<T>, rate: u32, distortion: u64) -> f64 {
358
359
  let rate_in_bits = (rate as f64) / ((1 << OD_BITRES) as f64);
  (distortion as f64) + fi.lambda * rate_in_bits
360
361
}

362
pub fn rdo_tx_size_type<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
363
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
364
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: BlockOffset,
Thomas Daede's avatar
Thomas Daede committed
365
  luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2], skip: bool
366
) -> (TxSize, TxType) {
367
368
369
370
  let mut tx_size = max_txsize_rect_lookup[bsize as usize];
  let mut best_tx_type = TxType::DCT_DCT;
  let mut best_tx_size = tx_size;
  let mut best_rd = std::f64::MAX;
371
372
  let is_inter = !luma_mode.is_intra();

373
374
  let do_rdo_tx_size = fi.tx_mode_select && fi.config.speed_settings.rdo_tx_decision &&
                luma_mode.is_intra();
375
  let rdo_tx_depth = if do_rdo_tx_size { 2 } else { 0 };
376
377
378
379
380
381
382
  let cw_checkpoint = cw.checkpoint();

  for _ in 0..=rdo_tx_depth {
    let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set);

    let do_rdo_tx_type = tx_set > TxSet::TX_SET_DCTONLY &&
        fi.config.speed_settings.rdo_tx_decision && !skip;
383

384
385
386
387
388
389
390
391
    if !do_rdo_tx_size && !do_rdo_tx_type { return (best_tx_size, best_tx_type) };

    let tx_types = if do_rdo_tx_type { RAV1E_TX_TYPES } else { &[TxType::DCT_DCT] };

    // Luma plane transform type decision
    let (tx_type, rd_cost) =
        rdo_tx_type_decision(
          fi,
Romain Vimont's avatar
Romain Vimont committed
392
          ts,
393
394
395
396
397
          cw,
          luma_mode,
          ref_frames,
          mvs,
          bsize,
398
          tile_bo,
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
          tx_size,
          tx_set,
          tx_types
        );

    if rd_cost < best_rd {
      best_tx_size = tx_size;
      best_tx_type = tx_type;
      best_rd = rd_cost;
    }

    debug_assert!(tx_size.width_log2() <= bsize.width_log2());
    debug_assert!(tx_size.height_log2() <= bsize.height_log2());
    debug_assert!(tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT);

    tx_size = sub_tx_size_map[best_tx_size as usize];
    if tx_size == best_tx_size { break; };

    cw.rollback(&cw_checkpoint);
  }
419

420
  (best_tx_size, best_tx_type)
421
422
}

423
424
425
426
427
428
struct EncodingSettings {
  mode_luma: PredictionMode,
  mode_chroma: PredictionMode,
  cfl_params: CFLParams,
  skip: bool,
  rd: f64,
Thomas Daede's avatar
Thomas Daede committed
429
  ref_frames: [RefType; 2],
fbossen's avatar
fbossen committed
430
  mvs: [MotionVector; 2],
431
432
433
434
435
436
  tx_size: TxSize,
  tx_type: TxType
}

impl Default for EncodingSettings {
  fn default() -> Self {
437
    Self {
438
439
      mode_luma: PredictionMode::DC_PRED,
      mode_chroma: PredictionMode::DC_PRED,
440
      cfl_params: CFLParams::default(),
441
442
      skip: false,
      rd: std::f64::MAX,
fbossen's avatar
fbossen committed
443
      ref_frames: [INTRA_FRAME, NONE_FRAME],
Vladimir Kazakov's avatar
Vladimir Kazakov committed
444
      mvs: [MotionVector::default(); 2],
445
446
447
448
449
      tx_size: TxSize::TX_4X4,
      tx_type: TxType::DCT_DCT
    }
  }
}
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564

#[inline]
fn luma_chroma_mode_rdo<T: Pixel> (luma_mode: PredictionMode,
  fi: &FrameInvariants<T>,
  bsize: BlockSize,
  tile_bo: BlockOffset,
  ts: &mut TileStateMut<'_, T>,
  cw: &mut ContextWriter,
  rdo_type: RDOType,
  cw_checkpoint: &ContextWriterCheckpoint,
  best: &mut EncodingSettings,
  mvs: [MotionVector; 2],
  ref_frames: [RefType; 2],
  mode_set_chroma: &[PredictionMode],
  luma_mode_is_intra: bool,
  mode_context: usize,
  mv_stack: &ArrayVec<[CandidateMV; 9]>) {
    let (tx_size, mut tx_type) = rdo_tx_size_type(
      fi, ts, cw, bsize, tile_bo, luma_mode, ref_frames, mvs, false,
    );

    // Get block luma and chroma dimensions
    let w = bsize.width();
    let h = bsize.height();

    let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;

    let is_chroma_block = has_chroma(tile_bo, bsize, xdec, ydec);
    
    // Find the best chroma prediction mode for the current luma prediction mode
    let mut chroma_rdo = |skip: bool| {
      mode_set_chroma.iter().for_each(|&chroma_mode| {
        let wr = &mut WriterCounter::new();
        let tell = wr.tell_frac();

        if skip { tx_type = TxType::DCT_DCT; };

        if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() {
          cw.write_partition(wr, tile_bo, PartitionType::PARTITION_NONE, bsize);
        }

        // TODO(yushin): luma and chroma would have different decision based on chroma format
        let need_recon_pixel = luma_mode_is_intra && tx_size.block_size() != bsize;

        encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, skip);
        let tx_dist =
          encode_block_post_cdef(
            fi,
            ts,
            cw,
            wr,
            luma_mode,
            chroma_mode,
            ref_frames,
            mvs,
            bsize,
            tile_bo,
            skip,
            CFLParams::default(),
            tx_size,
            tx_type,
            mode_context,
            &mv_stack,
            rdo_type,
            need_recon_pixel
          );

        let rate = wr.tell_frac() - tell;
        let distortion = if fi.use_tx_domain_distortion && !need_recon_pixel {
          compute_tx_distortion(
            fi,
            ts,
            w,
            h,
            is_chroma_block,
            tile_bo,
            tx_dist,
            skip,
            false
          )
        } else {
          compute_distortion(
            fi,
            ts,
            w,
            h,
            is_chroma_block,
            tile_bo,
            false
          )
        };
        let rd = compute_rd_cost(fi, rate, distortion);
        if rd < best.rd {
          //if rd < best.rd || luma_mode == PredictionMode::NEW_NEWMV {
          best.rd = rd;
          best.mode_luma = luma_mode;
          best.mode_chroma = chroma_mode;
          best.ref_frames = ref_frames;
          best.mvs = mvs;
          best.skip = skip;
          best.tx_size = tx_size;
          best.tx_type = tx_type;
        }

        cw.rollback(cw_checkpoint);
      });
    };

    chroma_rdo(false);
    // Don't skip when using intra modes
    if !luma_mode_is_intra {
      chroma_rdo(true);
    };
  }

565
// RDO-based mode decision
566
pub fn rdo_mode_decision<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
567
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
568
  cw: &mut ContextWriter, bsize: BlockSize, tile_bo: BlockOffset,
569
  pmvs: &mut [Option<MotionVector>]
570
) -> RDOPartitionOutput {
571
  let mut best = EncodingSettings::default();
Michael Bebenita's avatar
Michael Bebenita committed
572
573
574
575
576

  // Get block luma and chroma dimensions
  let w = bsize.width();
  let h = bsize.height();

Romain Vimont's avatar
Romain Vimont committed
577
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
578
  let is_chroma_block = has_chroma(tile_bo, bsize, xdec, ydec);
Michael Bebenita's avatar
Michael Bebenita committed
579

580
  let cw_checkpoint = cw.checkpoint();
581

582
583
584
585
586
587
  // we can never have more than 7 reference frame sets
  let mut ref_frames_set = ArrayVec::<[_; 7]>::new();
  // again, max of 7 ref slots
  let mut ref_slot_set = ArrayVec::<[_; 7]>::new();
  // our implementation never returns more than 3 at the moment
  let mut mvs_from_me = ArrayVec::<[_; 3]>::new();
fbossen's avatar
fbossen committed
588
589
  let mut fwdref = None;
  let mut bwdref = None;
590

591
592
593
594
595
  let rdo_type = if fi.config.train_rdo {
    RDOType::Train
  } else if fi.use_tx_domain_rate {
    RDOType::TxDistEstRate
  } else if fi.use_tx_domain_distortion {
596
597
598
599
600
    RDOType::TxDistRealRate
  } else {
    RDOType::PixelDistRealRate
  };

601
  if fi.frame_type == FrameType::INTER {
Thomas Daede's avatar
Thomas Daede committed
602
    for i in ALL_INTER_REFS.iter() {
fbossen's avatar
fbossen committed
603
      // Don't search LAST3 since it's used only for probs
Thomas Daede's avatar
Thomas Daede committed
604
605
606
      if *i == LAST3_FRAME { continue; }
      if !ref_slot_set.contains(&fi.ref_frames[i.to_index()]) {
        if fwdref == None && i.is_fwd_ref() {
fbossen's avatar
fbossen committed
607
608
          fwdref = Some(ref_frames_set.len());
        }
Thomas Daede's avatar
Thomas Daede committed
609
        if bwdref == None && i.is_bwd_ref() {
fbossen's avatar
fbossen committed
610
611
          bwdref = Some(ref_frames_set.len());
        }
Thomas Daede's avatar
Thomas Daede committed
612
613
        ref_frames_set.push([*i, NONE_FRAME]);
        let slot_idx = fi.ref_frames[i.to_index()];
614
        ref_slot_set.push(slot_idx);
615
      }
616
    }
617
    assert!(!ref_frames_set.is_empty());
618
619
  }

620
  let mut inter_mode_set = ArrayVec::<[(PredictionMode, usize); 20]>::new();
621
622
  let mut mv_stacks = ArrayVec::<[_; 20]>::new();
  let mut mode_contexts = ArrayVec::<[_; 7]>::new();
623

624
625
626
627
628
629
  let motion_estimation = if fi.config.speed_settings.diamond_me {
    crate::me::DiamondSearch::motion_estimation
  } else {
    crate::me::FullSearch::motion_estimation
  };

fbossen's avatar
fbossen committed
630
  for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
631
    let mut mv_stack = ArrayVec::<[CandidateMV; 9]>::new();
632
    mode_contexts.push(cw.find_mvrefs(tile_bo, ref_frames, &mut mv_stack, bsize, fi, false));
633
634

    if fi.frame_type == FrameType::INTER {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
635
      let mut pmv = [MotionVector::default(); 2];
636
      if !mv_stack.is_empty() { pmv[0] = mv_stack[0].this_mv; }
Frank Bossen's avatar
Frank Bossen committed
637
      if mv_stack.len() > 1 { pmv[1] = mv_stack[1].this_mv; }
638
      let ref_slot = ref_slot_set[i] as usize;
639
      let cmv = pmvs[ref_slot].unwrap_or_else(Default::default);
640

641
      let b_me = motion_estimation(fi, ts, bsize, tile_bo, ref_frames[0], cmv, pmv);
642

643
644
645
646
647
      if !fi.config.speed_settings.encode_bottomup &&
        (bsize == BlockSize::BLOCK_32X32 || bsize == BlockSize::BLOCK_64X64) {
          pmvs[ref_slot] = Some(b_me);
      };

Frank Bossen's avatar
Frank Bossen committed
648
      mvs_from_me.push([
649
        b_me,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
650
        MotionVector::default()
Frank Bossen's avatar
Frank Bossen committed
651
652
      ]);

653
      for &x in RAV1E_INTER_MODES_MINIMAL {
654
        inter_mode_set.push((x, i));
655
      }
656
      if !mv_stack.is_empty() {
657
        inter_mode_set.push((PredictionMode::NEAR0MV, i));
658
659
      }
      if mv_stack.len() >= 2 {
660
        inter_mode_set.push((PredictionMode::GLOBALMV, i));
661
      }
662
      let include_near_mvs = fi.config.speed_settings.include_near_mvs;
663
      if include_near_mvs {
fbossen's avatar
fbossen committed
664
        if mv_stack.len() >= 3 {
665
          inter_mode_set.push((PredictionMode::NEAR1MV, i));
666
        }
fbossen's avatar
fbossen committed
667
        if mv_stack.len() >= 4 {
668
          inter_mode_set.push((PredictionMode::NEAR2MV, i));
669
        }
670
      }
671
672
673
      if !mv_stack.iter().take(if include_near_mvs {4} else {2})
        .any(|ref x| x.this_mv.row == mvs_from_me[i][0].row && x.this_mv.col == mvs_from_me[i][0].col)
        && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0) {
674
          inter_mode_set.push((PredictionMode::NEWMV, i));
675
        }
676
    }
fbossen's avatar
fbossen committed
677
678
679
680
681
682
683
684
685
686
687
688
689
690
    mv_stacks.push(mv_stack);
  }

  let sz = bsize.width_mi().min(bsize.height_mi());

  if fi.frame_type == FrameType::INTER && fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
    // Adding compound candidate
    if let Some(r0) = fwdref {
      if let Some(r1) = bwdref {
        let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
        ref_frames_set.push(ref_frames);
        let mv0 = mvs_from_me[r0][0];
        let mv1 = mvs_from_me[r1][0];
        mvs_from_me.push([mv0, mv1]);
691
        let mut mv_stack = ArrayVec::<[CandidateMV; 9]>::new();
692
        mode_contexts.push(cw.find_mvrefs(tile_bo, ref_frames, &mut mv_stack, bsize, fi, true));
fbossen's avatar
fbossen committed
693
        for &x in RAV1E_INTER_COMPOUND_MODES {
694
          inter_mode_set.push((x, ref_frames_set.len() - 1));
fbossen's avatar
fbossen committed
695
696
697
698
        }
        mv_stacks.push(mv_stack);
      }
    }
699
700
  }

701
  if fi.frame_type != FrameType::INTER {
702
    assert!(inter_mode_set.is_empty());
703
704
  }

705
  inter_mode_set.iter().for_each(|&(luma_mode, i)| {
fbossen's avatar
fbossen committed
706
707
    let mvs = match luma_mode {
      PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
708
      PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => if !mv_stacks[i].is_empty() {
fbossen's avatar
fbossen committed
709
        [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
710
      } else {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
711
        [MotionVector::default(); 2]
712
      },
713
      PredictionMode::NEAR0MV => if mv_stacks[i].len() > 1 {
fbossen's avatar
fbossen committed
714
        [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
715
      } else {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
716
        [MotionVector::default(); 2]
717
718
      },
      PredictionMode::NEAR1MV | PredictionMode::NEAR2MV =>
719
720
        [mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].this_mv,
         mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].comp_mv],
fbossen's avatar
fbossen committed
721
722
      PredictionMode::NEAREST_NEWMV => [mv_stacks[i][0].this_mv, mvs_from_me[i][1]],
      PredictionMode::NEW_NEARESTMV => [mvs_from_me[i][0], mv_stacks[i][0].comp_mv],
Vladimir Kazakov's avatar
Vladimir Kazakov committed
723
      _ => [MotionVector::default(); 2]
724
    };
725
    let mode_set_chroma = ArrayVec::from([luma_mode]);
726

727
    luma_chroma_mode_rdo(luma_mode, fi, bsize, tile_bo, ts, cw, rdo_type, &cw_checkpoint, &mut best, mvs, ref_frames_set[i], &mode_set_chroma, false,
728
             mode_contexts[i], &mv_stacks[i]);
729
  });
730

731
  if !best.skip {
732
733
734
735
    let tx_size = bsize.tx_size();

    // Reduce number of prediction modes at higher speed levels
    let num_modes_rdo = if (fi.frame_type == FrameType::KEY
736
737
                            && fi.config.speed_settings.prediction_modes
                            >= PredictionModesSetting::ComplexKeyframes)
738
      || (fi.frame_type == FrameType::INTER
739
          && fi.config.speed_settings.prediction_modes
740
          >= PredictionModesSetting::ComplexAll)
741
742
743
744
745
746
747
    {
      7
    } else {
      3
    };

    let intra_mode_set = RAV1E_INTRA_MODES;
748
749
    let mut sads = {
      let edge_buf = {
Romain Vimont's avatar
Romain Vimont committed
750
        let rec = &ts.rec.planes[0].as_const();
751
        let po = tile_bo.plane_offset(&rec.plane_cfg);
752
        get_intra_edges(rec, po, tx_size, fi.sequence.bit_depth, None)
753
      };
754
755
756
      intra_mode_set
        .iter()
        .map(|&luma_mode| {
757
          let tile_rect = ts.tile_rect();
Romain Vimont's avatar
Romain Vimont committed
758
          let rec = &mut ts.rec.planes[0];
759
          let mut rec_region = rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo });
760
          luma_mode.predict_intra(
761
762
            tile_rect,
            &mut rec_region,
763
764
765
766
767
768
            tx_size,
            fi.sequence.bit_depth,
            &[0i16; 2],
            0,
            &edge_buf
          );
769

770
          let plane_org = ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo });
771
          let plane_ref = rec_region.as_const();
772
773
774
775
776
777
778

          (
            luma_mode,
            get_sad(
              &plane_org,
              &plane_ref,
              tx_size.width(),
Romain Vimont's avatar
Romain Vimont committed
779
              tx_size.height(),
780
781
782
783
784
              fi.sequence.bit_depth
            )
          )
        })
        .collect::<Vec<_>>()
785
    };
786
787
788
789
790
791
792
793

    sads.sort_by_key(|a| a.1);

    // Find mode with lowest rate cost
    let mut z = 32768;
    let probs_all = if fi.frame_type == FrameType::INTER {
      cw.get_cdf_intra_mode(bsize)
    } else {
794
      cw.get_cdf_intra_mode_kf(tile_bo)
795
796
797
798
799
800
    }.iter().take(INTRA_MODES).map(|&a| { let d = z - a; z = a; d }).collect::<Vec<_>>();


    let mut probs = intra_mode_set.iter().map(|&a| (a, probs_all[a as usize])).collect::<Vec<_>>();
    probs.sort_by_key(|a| !a.1);

801
    let mut modes = ArrayVec::<[_;INTRA_MODES]>::new();
802
803
804
805
806
807
808
809
810
    probs
      .iter()
      .take(num_modes_rdo / 2)
      .for_each(|&(luma_mode, _prob)| modes.push(luma_mode));
    sads.iter().take(num_modes_rdo).for_each(|&(luma_mode, _sad)| {
      if !modes.contains(&luma_mode) {
        modes.push(luma_mode)
      }
    });
811
812

    modes.iter().take(num_modes_rdo).for_each(|&luma_mode| {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
813
      let mvs = [MotionVector::default(); 2];
814
      let ref_frames = [INTRA_FRAME, NONE_FRAME];
815
816
817
818
      let mut mode_set_chroma = vec![luma_mode];
      if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
        mode_set_chroma.push(PredictionMode::DC_PRED);
      }
819
      luma_chroma_mode_rdo(luma_mode, fi, bsize, tile_bo, ts, cw, rdo_type, &cw_checkpoint, &mut best, mvs, ref_frames, &mode_set_chroma, true,
820
                           0, &ArrayVec::<[CandidateMV; 9]>::new());
821
    });
822
  }
Michael Bebenita's avatar
Michael Bebenita committed
823

824
  if best.mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() {
825
826
    let chroma_mode = PredictionMode::UV_CFL_PRED;
    let cw_checkpoint = cw.checkpoint();
827
    let wr: &mut dyn Writer = &mut WriterCounter::new();
828
829
    write_tx_blocks(
      fi,
Romain Vimont's avatar
Romain Vimont committed
830
      ts,
831
832
      cw,
      wr,
833
834
      best.mode_luma,
      best.mode_luma,
835
      tile_bo,
836
      bsize,
837
838
      best.tx_size,
      best.tx_type,
839
      false,
840
      CFLParams::default(),
841
      true,
842
      rdo_type,
843
      true
844
845
    );
    cw.rollback(&cw_checkpoint);
846
    if let Some(cfl) = rdo_cfl_alpha(ts, tile_bo, bsize, fi.sequence.bit_depth) {
Raphaël Zumer's avatar
Raphaël Zumer committed
847
      let wr: &mut dyn Writer = &mut WriterCounter::new();
848
849
      let tell = wr.tell_frac();

850
851
      encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, best.skip);
      let _ = encode_block_post_cdef(
852
        fi,
Romain Vimont's avatar
Romain Vimont committed
853
        ts,
854
855
        cw,
        wr,
856
        best.mode_luma,
857
        chroma_mode,
858
        best.ref_frames,
Josh Holmer's avatar
Josh Holmer committed
859
        best.mvs,
860
        bsize,
861
        tile_bo,
862
        best.skip,
863
        cfl,
864
865
        best.tx_size,
        best.tx_type,
866
        0,
867
        &Vec::new(),
868
        rdo_type,
869
        true // For CFL, luma should be always reconstructed.
870
871
      );

872
      let rate = wr.tell_frac() - tell;
873
874

      // For CFL, tx-domain distortion is not an option.
875
876
      let distortion =
        compute_distortion(
877
          fi,
Romain Vimont's avatar
Romain Vimont committed
878
          ts,
879
880
881
          w,
          h,
          is_chroma_block,
882
          tile_bo,
883
884
          false
        );
885
      let rd = compute_rd_cost(fi, rate, distortion);
886
887
888
889
      if rd < best.rd {
        best.rd = rd;
        best.mode_chroma = chroma_mode;
        best.cfl_params = cfl;
890
891
892
893
894
895
      }

      cw.rollback(&cw_checkpoint);
    }
  }

896
897
898
  cw.bc.blocks.set_mode(tile_bo, bsize, best.mode_luma);
  cw.bc.blocks.set_ref_frames(tile_bo, bsize, best.ref_frames);
  cw.bc.blocks.set_motion_vectors(tile_bo, bsize, best.mvs);
899

900
  assert!(best.rd >= 0_f64);
Michael Bebenita's avatar
Michael Bebenita committed
901

902
  RDOPartitionOutput {
903
    bo: tile_bo,
904
    bsize,
905
906
907
908
909
    pred_mode_luma: best.mode_luma,
    pred_mode_chroma: best.mode_chroma,
    pred_cfl_params: best.cfl_params,
    ref_frames: best.ref_frames,
    mvs: best.mvs,
910
    rd_cost: best.rd,
911
912
913
    skip: best.skip,
    tx_size: best.tx_size,
    tx_type: best.tx_type,
Michael Bebenita's avatar
Michael Bebenita committed
914
  }
Michael Bebenita's avatar
Michael Bebenita committed
915
916
}

917
pub fn rdo_cfl_alpha<T: Pixel>(
918
  ts: &mut TileStateMut<'_, T>, tile_bo: BlockOffset, bsize: BlockSize, bit_depth: usize
919
) -> Option<CFLParams> {
Romain Vimont's avatar
Romain Vimont committed
920
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
921
  let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
922

923
  let mut ac: AlignedArray<[i16; 32 * 32]> = UninitializedAlignedArray();
924
  luma_ac(&mut ac.array, ts, tile_bo, bsize);
925
926
  let best_alpha: Vec<i16> = (1..3)
    .map(|p| {
927
928
      let &PlaneConfig { xdec, ydec, .. } = ts.rec.planes[p].plane_cfg;
      let tile_rect = ts.tile_rect().decimated(xdec, ydec);
Romain Vimont's avatar
Romain Vimont committed
929
930
      let rec = &mut ts.rec.planes[p];
      let input = &ts.input_tile.planes[p];
931
      let po = tile_bo.plane_offset(rec.plane_cfg);
932
933
      (-16i16..17i16)
        .min_by_key(|&alpha| {
934
          let edge_buf = get_intra_edges(
Romain Vimont's avatar
Romain Vimont committed
935
            &rec.as_const(),
936
            po,
937
938
939
940
            uv_tx_size,
            bit_depth,
            Some(PredictionMode::UV_CFL_PRED)
          );
Romain Vimont's avatar
Romain Vimont committed
941

942
          let mut rec_region = rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo });
943
          PredictionMode::UV_CFL_PRED.predict_intra(
944
945
            tile_rect,
            &mut rec_region,
946
947
            uv_tx_size,
            bit_depth,
948
            &ac.array,
Frank Bossen's avatar
Frank Bossen committed
949
            alpha,
950
            &edge_buf
951
952
          );
          sse_wxh(
953
            &input.subregion(Area::BlockStartingAt { bo: tile_bo }),
954
            &rec_region.as_const(),
955
956
957
958
959
960
961
962
963
964
            uv_tx_size.width(),
            uv_tx_size.height()
          )
        }).unwrap()
    }).collect();

  if best_alpha[0] == 0 && best_alpha[1] == 0 {
    None
  } else {
    Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1]))
965
966
967
  }
}

968
// RDO-based transform type decision
969
pub fn rdo_tx_type_decision<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
970
  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter,
Thomas Daede's avatar
Thomas Daede committed
971
  mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
972
  bsize: BlockSize, tile_bo: BlockOffset, tx_size: TxSize, tx_set: TxSet,
973
974
  tx_types: &[TxType]
) -> (TxType, f64) {
Michael Bebenita's avatar
Michael Bebenita committed
975
976
977
978
979
980
981
  let mut best_type = TxType::DCT_DCT;
  let mut best_rd = std::f64::MAX;

  // Get block luma and chroma dimensions
  let w = bsize.width();
  let h = bsize.height();

Romain Vimont's avatar
Romain Vimont committed
982
  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
983
  let is_chroma_block = has_chroma(tile_bo, bsize, xdec, ydec);
Michael Bebenita's avatar
Michael Bebenita committed
984

985
  let is_inter = !mode.is_intra();
Michael Bebenita's avatar
Michael Bebenita committed
986

987
  let cw_checkpoint = cw.checkpoint();
Michael Bebenita's avatar
Michael Bebenita committed
988

989
990
991
992
993
  let rdo_type = if fi.use_tx_domain_distortion {
    RDOType::TxDistRealRate
  } else {
    RDOType::PixelDistRealRate
  };
994
  let need_recon_pixel = tx_size.block_size() != bsize;
995

996
  for &tx_type in tx_types {
Michael Bebenita's avatar
Michael Bebenita committed
997
    // Skip unsupported transform types
Yushin Cho's avatar
Yushin Cho committed
998
    if av1_tx_used[tx_set as usize][tx_type as usize] == 0 {
Michael Bebenita's avatar
Michael Bebenita committed
999
1000
1001
      continue;
    }

1002
    if is_inter {
1003
      motion_compensate(fi, ts, cw, mode, ref_frames, mvs, bsize, tile_bo, true);
1004
    }
1005

Raphaël Zumer's avatar
Raphaël Zumer committed
1006
    let wr: &mut dyn Writer = &mut WriterCounter::new();