rdo.rs 36.1 KB
Newer Older
Guillaume Martres's avatar
Guillaume Martres committed
1
// Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
2
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
Guillaume Martres's avatar
Guillaume Martres committed
3 4 5 6 7 8 9 10 11
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

#![allow(non_camel_case_types)]
Michael Bebenita's avatar
Michael Bebenita committed
12
#![cfg_attr(feature = "cargo-clippy", allow(cast_lossless))]
Guillaume Martres's avatar
Guillaume Martres committed
13

Raphaël Zumer's avatar
Raphaël Zumer committed
14 15
use api::PredictionModesSetting;
use cdef::*;
16
use context::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
17 18
use ec::{OD_BITRES, Writer, WriterCounter};
use encoder::{ChromaSampling, ReferenceMode};
19 20
use encode_block_a;
use encode_block_b;
21
use encode_block_with_modes;
Raphaël Zumer's avatar
Raphaël Zumer committed
22 23 24 25 26 27
use Frame;
use FrameInvariants;
use FrameState;
use FrameType;
use luma_ac;
use me::*;
28
use motion_compensate;
29
use partition::*;
Guillaume Martres's avatar
Guillaume Martres committed
30
use plane::*;
31
use predict::{RAV1E_INTRA_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTER_COMPOUND_MODES};
32
use quantize::dc_q;
Raphaël Zumer's avatar
Raphaël Zumer committed
33 34
use Sequence;
use Tune;
Michael Bebenita's avatar
Michael Bebenita committed
35
use write_tx_blocks;
36
use write_tx_tree;
Raphaël Zumer's avatar
Raphaël Zumer committed
37 38 39

use std;
use std::vec::Vec;
40
use partition::PartitionType::*;
Guillaume Martres's avatar
Guillaume Martres committed
41

42
#[derive(Clone)]
43
pub struct RDOOutput {
Michael Bebenita's avatar
Michael Bebenita committed
44 45 46
  pub rd_cost: f64,
  pub part_type: PartitionType,
  pub part_modes: Vec<RDOPartitionOutput>
47 48 49 50
}

#[derive(Clone)]
pub struct RDOPartitionOutput {
Michael Bebenita's avatar
Michael Bebenita committed
51 52
  pub rd_cost: f64,
  pub bo: BlockOffset,
53
  pub bsize: BlockSize,
Michael Bebenita's avatar
Michael Bebenita committed
54 55
  pub pred_mode_luma: PredictionMode,
  pub pred_mode_chroma: PredictionMode,
56
  pub pred_cfl_params: CFLParams,
fbossen's avatar
fbossen committed
57 58
  pub ref_frames: [usize; 2],
  pub mvs: [MotionVector; 2],
59 60 61
  pub skip: bool,
  pub tx_size: TxSize,
  pub tx_type: TxType,
62 63
}

64
#[allow(unused)]
65 66 67
fn cdef_dist_wxh_8x8(
  src1: &PlaneSlice<'_>, src2: &PlaneSlice<'_>, bit_depth: usize
) -> u64 {
68
  let coeff_shift = bit_depth - 8;
69

Jean-Marc Valin's avatar
Jean-Marc Valin committed
70 71 72 73 74 75 76 77 78 79 80
  let mut sum_s: i32 = 0;
  let mut sum_d: i32 = 0;
  let mut sum_s2: i64 = 0;
  let mut sum_d2: i64 = 0;
  let mut sum_sd: i64 = 0;
  for j in 0..8 {
    for i in 0..8 {
      let s = src1.p(i, j) as i32;
      let d = src2.p(i, j) as i32;
      sum_s += s;
      sum_d += d;
81 82 83
      sum_s2 += (s * s) as i64;
      sum_d2 += (d * d) as i64;
      sum_sd += (s * d) as i64;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
84 85 86 87 88 89
    }
  }
  let svar = (sum_s2 - ((sum_s as i64 * sum_s as i64 + 32) >> 6)) as f64;
  let dvar = (sum_d2 - ((sum_d as i64 * sum_d as i64 + 32) >> 6)) as f64;
  let sse = (sum_d2 + sum_s2 - 2 * sum_sd) as f64;
  //The two constants were tuned for CDEF, but can probably be better tuned for use in general RDO
90 91
  let ssim_boost = 0.5_f64 * (svar + dvar + (400 << 2 * coeff_shift) as f64)
    / f64::sqrt((20000 << 4 * coeff_shift) as f64 + svar * dvar);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
92 93 94
  (sse * ssim_boost + 0.5_f64) as u64
}

95 96
#[allow(unused)]
fn cdef_dist_wxh(
97 98
  src1: &PlaneSlice<'_>, src2: &PlaneSlice<'_>, w: usize, h: usize,
  bit_depth: usize
99
) -> u64 {
100 101 102
  assert!(w & 0x7 == 0);
  assert!(h & 0x7 == 0);

Jean-Marc Valin's avatar
Jean-Marc Valin committed
103
  let mut sum: u64 = 0;
104 105 106 107
  for j in 0..h / 8 {
    for i in 0..w / 8 {
      sum += cdef_dist_wxh_8x8(
        &src1.subslice(i * 8, j * 8),
108 109
        &src2.subslice(i * 8, j * 8),
        bit_depth
110
      )
Jean-Marc Valin's avatar
Jean-Marc Valin committed
111 112 113 114 115
    }
  }
  sum
}

116
// Sum of Squared Error for a wxh block
117
pub fn sse_wxh(
118 119
  src1: &PlaneSlice<'_>, src2: &PlaneSlice<'_>, w: usize, h: usize
) -> u64 {
120 121 122
  assert!(w & (MI_SIZE - 1) == 0);
  assert!(h & (MI_SIZE - 1) == 0);

Michael Bebenita's avatar
Michael Bebenita committed
123 124
  let mut sse: u64 = 0;
  for j in 0..h {
125 126 127 128 129
    let src1j = src1.subslice(0, j);
    let src2j = src2.subslice(0, j);
    let s1 = src1j.as_slice_w_width(w);
    let s2 = src2j.as_slice_w_width(w);

130 131 132 133 134 135 136
    let row_sse = s1
      .iter()
      .zip(s2)
      .map(|(&a, &b)| {
        let c = (a as i16 - b as i16) as i32;
        (c * c) as u32
      }).sum::<u32>();
137
    sse += row_sse as u64;
Michael Bebenita's avatar
Michael Bebenita committed
138 139
  }
  sse
Guillaume Martres's avatar
Guillaume Martres committed
140
}
141

142
pub fn get_lambda(fi: &FrameInvariants, bit_depth: usize) -> f64 {
143
  let q = dc_q(fi.base_q_idx, fi.dc_delta_q[0], bit_depth) as f64;
Michael Bebenita's avatar
Michael Bebenita committed
144 145 146 147 148 149

  // Convert q into Q0 precision, given that libaom quantizers are Q3
  let q0 = q / 8.0_f64;

  // Lambda formula from doc/theoretical_results.lyx in the daala repo
  // Use Q0 quantizer since lambda will be applied to Q0 pixel domain
150 151 152
  q0 * q0 * std::f64::consts::LN_2 / 6.0
}

Frank Bossen's avatar
Frank Bossen committed
153
pub fn get_lambda_sqrt(fi: &FrameInvariants, bit_depth: usize) -> f64 {
154
  let q = dc_q(fi.base_q_idx, fi.dc_delta_q[0], bit_depth) as f64;
Frank Bossen's avatar
Frank Bossen committed
155 156 157 158 159 160 161 162 163

  // Convert q into Q0 precision, given that libaom quantizers are Q3
  let q0 = q / 8.0_f64;

  // Lambda formula from doc/theoretical_results.lyx in the daala repo
  // Use Q0 quantizer since lambda will be applied to Q0 pixel domain
  q0 * (std::f64::consts::LN_2 / 6.0).sqrt()
}

164 165 166
// Compute the rate-distortion cost for an encode
fn compute_rd_cost(
  fi: &FrameInvariants, fs: &FrameState, w_y: usize, h_y: usize,
167 168
  is_chroma_block: bool, bo: &BlockOffset, bit_cost: u32, bit_depth: usize,
  luma_only: bool
169 170
) -> f64 {
  let lambda = get_lambda(fi, bit_depth);
Michael Bebenita's avatar
Michael Bebenita committed
171 172 173

  // Compute distortion
  let po = bo.plane_offset(&fs.input.planes[0].cfg);
Michael Bebenita's avatar
Michael Bebenita committed
174 175 176 177 178 179 180 181 182 183 184 185
  let mut distortion = if fi.config.tune == Tune::Psnr {
    sse_wxh(
      &fs.input.planes[0].slice(&po),
      &fs.rec.planes[0].slice(&po),
      w_y,
      h_y
    )
  } else if fi.config.tune == Tune::Psychovisual {
    cdef_dist_wxh(
      &fs.input.planes[0].slice(&po),
      &fs.rec.planes[0].slice(&po),
      w_y,
186 187
      h_y,
      bit_depth
Michael Bebenita's avatar
Michael Bebenita committed
188 189 190 191
    )
  } else {
    unimplemented!();
  };
Michael Bebenita's avatar
Michael Bebenita committed
192

193
  if !luma_only {
194 195 196 197 198 199 200 201 202 203 204
  let PlaneConfig { xdec, ydec, .. } = fs.input.planes[1].cfg;

  let mask = !(MI_SIZE - 1);
  let mut w_uv = (w_y >> xdec) & mask;
  let mut h_uv = (h_y >> ydec) & mask;

  if (w_uv == 0 || h_uv == 0) && is_chroma_block {
    w_uv = MI_SIZE;
    h_uv = MI_SIZE;
  }

Michael Bebenita's avatar
Michael Bebenita committed
205 206 207
  // Add chroma distortion only when it is available
  if w_uv > 0 && h_uv > 0 {
    for p in 1..3 {
Frank Bossen's avatar
Frank Bossen committed
208
      let po = bo.plane_offset(&fs.input.planes[p].cfg);
Michael Bebenita's avatar
Michael Bebenita committed
209 210 211 212 213 214 215 216 217

      distortion += sse_wxh(
        &fs.input.planes[p].slice(&po),
        &fs.rec.planes[p].slice(&po),
        w_uv,
        h_uv
      );
    }
  };
218
  }
Michael Bebenita's avatar
Michael Bebenita committed
219 220
  // Compute rate
  let rate = (bit_cost as f64) / ((1 << OD_BITRES) as f64);
221

Michael Bebenita's avatar
Michael Bebenita committed
222
  (distortion as f64) + lambda * rate
223 224
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
// Compute the rate-distortion cost for an encode
fn compute_tx_rd_cost(
  fi: &FrameInvariants, fs: &FrameState, w_y: usize, h_y: usize,
  is_chroma_block: bool, bo: &BlockOffset, bit_cost: u32, tx_dist: i64,
  bit_depth: usize,
  skip: bool, luma_only: bool
) -> f64 {
  assert!(fi.config.tune == Tune::Psnr);

  let lambda = get_lambda(fi, bit_depth);

  // Compute distortion
  let mut distortion = if skip {
    let po = bo.plane_offset(&fs.input.planes[0].cfg);

    sse_wxh(
      &fs.input.planes[0].slice(&po),
      &fs.rec.planes[0].slice(&po),
      w_y,
      h_y
    )
  } else {
    assert!(tx_dist >= 0);
    tx_dist as u64
  };

  if !luma_only && skip {
    let PlaneConfig { xdec, ydec, .. } = fs.input.planes[1].cfg;

    let mask = !(MI_SIZE - 1);
    let mut w_uv = (w_y >> xdec) & mask;
    let mut h_uv = (h_y >> ydec) & mask;

    if (w_uv == 0 || h_uv == 0) && is_chroma_block {
      w_uv = MI_SIZE;
      h_uv = MI_SIZE;
    }

    // Add chroma distortion only when it is available
    if w_uv > 0 && h_uv > 0 {
      for p in 1..3 {
        let po = bo.plane_offset(&fs.input.planes[p].cfg);

        distortion += sse_wxh(
          &fs.input.planes[p].slice(&po),
          &fs.rec.planes[p].slice(&po),
          w_uv,
          h_uv
        );
      }
    }
  }
  // Compute rate
  let rate = (bit_cost as f64) / ((1 << OD_BITRES) as f64);

  (distortion as f64) + lambda * rate
}

283 284 285
pub fn rdo_tx_size_type(
  seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
  cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset,
286
  luma_mode: PredictionMode, ref_frames: [usize; 2], mvs: [MotionVector; 2], skip: bool
287
) -> (TxSize, TxType) {
288 289
  // these rules follow TX_MODE_LARGEST
  let tx_size = match bsize {
290 291 292
    BlockSize::BLOCK_4X4 => TxSize::TX_4X4,
    BlockSize::BLOCK_8X8 => TxSize::TX_8X8,
    BlockSize::BLOCK_16X16 => TxSize::TX_16X16,
293 294 295 296 297 298
    BlockSize::BLOCK_4X8 => TxSize::TX_4X8,
    BlockSize::BLOCK_8X4 => TxSize::TX_8X4,
    BlockSize::BLOCK_8X16 => TxSize::TX_8X16,
    BlockSize::BLOCK_16X8 => TxSize::TX_16X8,
    BlockSize::BLOCK_16X32 => TxSize::TX_16X32,
    BlockSize::BLOCK_32X16 => TxSize::TX_32X16,
299
    BlockSize::BLOCK_32X32 => TxSize::TX_32X32,
300 301 302 303
    BlockSize::BLOCK_32X64 => TxSize::TX_32X64,
    BlockSize::BLOCK_64X32 => TxSize::TX_64X32,
    BlockSize::BLOCK_64X64 => TxSize::TX_64X64,
    _ => unimplemented!()
304 305 306 307 308 309 310 311
  };
  cw.bc.set_tx_size(bo, tx_size);
  // Were we not hardcoded to TX_MODE_LARGEST, block tx size would be written here

  // Luma plane transform type decision
  let is_inter = !luma_mode.is_intra();
  let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set);

312
  let tx_type =
313
    if tx_set > TxSet::TX_SET_DCTONLY && fi.config.speed_settings.rdo_tx_decision && !skip {
314 315 316 317 318
      rdo_tx_type_decision(
        fi,
        fs,
        cw,
        luma_mode,
fbossen's avatar
fbossen committed
319 320
        ref_frames,
        mvs,
321 322 323 324
        bsize,
        bo,
        tx_size,
        tx_set,
325 326
        seq.bit_depth,
        seq.chroma_sampling
327 328
      )
    } else {
329
      TxType::DCT_DCT
330
    };
331

332 333
  assert!(tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT);

334 335 336
  (tx_size, tx_type)
}

337 338 339 340 341 342
struct EncodingSettings {
  mode_luma: PredictionMode,
  mode_chroma: PredictionMode,
  cfl_params: CFLParams,
  skip: bool,
  rd: f64,
fbossen's avatar
fbossen committed
343 344
  ref_frames: [usize; 2],
  mvs: [MotionVector; 2],
345 346 347 348 349 350 351 352 353 354 355 356
  tx_size: TxSize,
  tx_type: TxType
}

impl Default for EncodingSettings {
  fn default() -> Self {
    EncodingSettings {
      mode_luma: PredictionMode::DC_PRED,
      mode_chroma: PredictionMode::DC_PRED,
      cfl_params: CFLParams::new(),
      skip: false,
      rd: std::f64::MAX,
fbossen's avatar
fbossen committed
357 358
      ref_frames: [INTRA_FRAME, NONE_FRAME],
      mvs: [MotionVector { row: 0, col: 0 }; 2],
359 360 361 362 363
      tx_size: TxSize::TX_4X4,
      tx_type: TxType::DCT_DCT
    }
  }
}
364
// RDO-based mode decision
Michael Bebenita's avatar
Michael Bebenita committed
365
pub fn rdo_mode_decision(
366
  seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
367
  cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset,
368
  pmvs: &[Option<MotionVector>], needs_rec: bool
369
) -> RDOOutput {
370
  let mut best = EncodingSettings::default();
Michael Bebenita's avatar
Michael Bebenita committed
371 372 373 374 375

  // Get block luma and chroma dimensions
  let w = bsize.width();
  let h = bsize.height();

Michael Bebenita's avatar
Michael Bebenita committed
376
  let PlaneConfig { xdec, ydec, .. } = fs.input.planes[1].cfg;
Michael Bebenita's avatar
Michael Bebenita committed
377 378
  let is_chroma_block = has_chroma(bo, bsize, xdec, ydec);

379
  let cw_checkpoint = cw.checkpoint();
380

fbossen's avatar
fbossen committed
381
  let mut ref_frames_set = Vec::new();
382
  let mut ref_slot_set = Vec::new();
fbossen's avatar
fbossen committed
383 384 385
  let mut mvs_from_me = Vec::new();
  let mut fwdref = None;
  let mut bwdref = None;
386 387

  if fi.frame_type == FrameType::INTER {
388
    for i in LAST_FRAME..NONE_FRAME {
fbossen's avatar
fbossen committed
389 390
      // Don't search LAST3 since it's used only for probs
      if i == LAST3_FRAME { continue; }
391
      if !ref_slot_set.contains(&fi.ref_frames[i - LAST_FRAME]) {
fbossen's avatar
fbossen committed
392 393 394 395 396 397 398
        if fwdref == None && i < BWDREF_FRAME {
          fwdref = Some(ref_frames_set.len());
        }
        if bwdref == None && i >= BWDREF_FRAME {
          bwdref = Some(ref_frames_set.len());
        }
        ref_frames_set.push([i, NONE_FRAME]);
399 400
        let slot_idx = fi.ref_frames[i - LAST_FRAME];
        ref_slot_set.push(slot_idx);
401
      }
402
    }
fbossen's avatar
fbossen committed
403
    assert!(ref_frames_set.len() != 0);
404 405 406 407 408 409
  }

  let mut mode_set: Vec<(PredictionMode, usize)> = Vec::new();
  let mut mv_stacks = Vec::new();
  let mut mode_contexts = Vec::new();

fbossen's avatar
fbossen committed
410 411
  for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
    let mut mv_stack: Vec<CandidateMV> = Vec::new();
Frank Bossen's avatar
Frank Bossen committed
412
    mode_contexts.push(cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, fi, false));
413 414

    if fi.frame_type == FrameType::INTER {
Frank Bossen's avatar
Frank Bossen committed
415 416 417 418 419 420 421 422 423
      let mut pmv = [MotionVector{ row: 0, col: 0 }; 2];
      if mv_stack.len() > 0 { pmv[0] = mv_stack[0].this_mv; }
      if mv_stack.len() > 1 { pmv[1] = mv_stack[1].this_mv; }
      let cmv = pmvs[ref_slot_set[i] as usize].unwrap();
      mvs_from_me.push([
        motion_estimation(fi, fs, bsize, bo, ref_frames[0], cmv, seq.bit_depth, &pmv),
        MotionVector { row: 0, col: 0 }
      ]);

424 425 426
      for &x in RAV1E_INTER_MODES_MINIMAL {
        mode_set.push((x, i));
      }
427 428 429 430 431 432
      if mv_stack.len() >= 1 {
        mode_set.push((PredictionMode::NEAR0MV, i));
      }
      if mv_stack.len() >= 2 {
        mode_set.push((PredictionMode::GLOBALMV, i));
      }
433
      let include_near_mvs = fi.config.speed_settings.include_near_mvs;
434
      if include_near_mvs {
fbossen's avatar
fbossen committed
435
        if mv_stack.len() >= 3 {
436 437
          mode_set.push((PredictionMode::NEAR1MV, i));
        }
fbossen's avatar
fbossen committed
438
        if mv_stack.len() >= 4 {
439 440
          mode_set.push((PredictionMode::NEAR2MV, i));
        }
441
      }
442 443 444 445 446
      if !mv_stack.iter().take(if include_near_mvs {4} else {2})
        .any(|ref x| x.this_mv.row == mvs_from_me[i][0].row && x.this_mv.col == mvs_from_me[i][0].col)
        && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0) {
        mode_set.push((PredictionMode::NEWMV, i));
      }
447
    }
fbossen's avatar
fbossen committed
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
    mv_stacks.push(mv_stack);
  }

  let sz = bsize.width_mi().min(bsize.height_mi());

  if fi.frame_type == FrameType::INTER && fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
    // Adding compound candidate
    if let Some(r0) = fwdref {
      if let Some(r1) = bwdref {
        let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
        ref_frames_set.push(ref_frames);
        let mv0 = mvs_from_me[r0][0];
        let mv1 = mvs_from_me[r1][0];
        mvs_from_me.push([mv0, mv1]);
        let mut mv_stack: Vec<CandidateMV> = Vec::new();
Frank Bossen's avatar
Frank Bossen committed
463
        mode_contexts.push(cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, fi, true));
fbossen's avatar
fbossen committed
464 465 466 467 468 469
        for &x in RAV1E_INTER_COMPOUND_MODES {
          mode_set.push((x, ref_frames_set.len() - 1));
        }
        mv_stacks.push(mv_stack);
      }
    }
470 471
  }

472
  let luma_rdo = |luma_mode: PredictionMode, fs: &mut FrameState, cw: &mut ContextWriter, best: &mut EncodingSettings,
473
    mvs: [MotionVector; 2], ref_frames: [usize; 2], mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
474
    mode_context: usize, mv_stack: &Vec<CandidateMV>| {
475
    let (tx_size, mut tx_type) = rdo_tx_size_type(
fbossen's avatar
fbossen committed
476
      seq, fi, fs, cw, bsize, bo, luma_mode, ref_frames, mvs, false,
477
    );
478

479
    // Find the best chroma prediction mode for the current luma prediction mode
480 481
    let mut chroma_rdo = |skip: bool| {
      mode_set_chroma.iter().for_each(|&chroma_mode| {
482
        let wr: &mut dyn Writer = &mut WriterCounter::new();
483 484
        let tell = wr.tell_frac();

485 486
        if skip { tx_type = TxType::DCT_DCT; };

487 488 489 490
        if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() {
          cw.write_partition(wr, bo, PartitionType::PARTITION_NONE, bsize);
        }

491
        encode_block_a(seq, fs, cw, wr, bsize, bo, skip);
492
        let tx_dist =
493 494 495 496 497 498 499 500
        encode_block_b(
          seq,
          fi,
          fs,
          cw,
          wr,
          luma_mode,
          chroma_mode,
fbossen's avatar
fbossen committed
501 502
          ref_frames,
          mvs,
503 504 505 506
          bsize,
          bo,
          skip,
          seq.bit_depth,
507
          CFLParams::new(),
508 509 510
          tx_size,
          tx_type,
          mode_context,
511
          mv_stack,
512
          !needs_rec
513
        );
514 515

        let cost = wr.tell_frac() - tell;
516
        let rd = if fi.use_tx_domain_distortion && !needs_rec {
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
          compute_tx_rd_cost(
            fi,
            fs,
            w,
            h,
            is_chroma_block,
            bo,
            cost,
            tx_dist,
            seq.bit_depth,
            skip,
            false
          )
        } else {
          compute_rd_cost(
            fi,
            fs,
            w,
            h,
            is_chroma_block,
            bo,
            cost,
            seq.bit_depth,
            false
          )
        };
543
        if rd < best.rd {
fbossen's avatar
fbossen committed
544
        //if rd < best.rd || luma_mode == PredictionMode::NEW_NEWMV {
545 546 547
          best.rd = rd;
          best.mode_luma = luma_mode;
          best.mode_chroma = chroma_mode;
548
          best.ref_frames = ref_frames;
Josh Holmer's avatar
Josh Holmer committed
549
          best.mvs = mvs;
550 551 552
          best.skip = skip;
          best.tx_size = tx_size;
          best.tx_type = tx_type;
553
        }
554 555

        cw.rollback(&cw_checkpoint);
556
      });
557 558 559 560 561 562 563
    };

    chroma_rdo(false);
    // Don't skip when using intra modes
    if !luma_mode_is_intra {
        chroma_rdo(true);
    };
564 565
  };

566 567 568 569 570
  if fi.frame_type != FrameType::INTER {
    assert!(mode_set.len() == 0);
  }

  mode_set.iter().for_each(|&(luma_mode, i)| {
fbossen's avatar
fbossen committed
571 572 573 574
    let mvs = match luma_mode {
      PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
      PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => if mv_stacks[i].len() > 0 {
        [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
575
      } else {
fbossen's avatar
fbossen committed
576
        [MotionVector { row: 0, col: 0 }; 2]
577
      },
578
      PredictionMode::NEAR0MV => if mv_stacks[i].len() > 1 {
fbossen's avatar
fbossen committed
579
        [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
580
      } else {
fbossen's avatar
fbossen committed
581
        [MotionVector { row: 0, col: 0 }; 2]
582 583
      },
      PredictionMode::NEAR1MV | PredictionMode::NEAR2MV =>
fbossen's avatar
fbossen committed
584 585 586 587 588
          [mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].this_mv,
          mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].comp_mv],
      PredictionMode::NEAREST_NEWMV => [mv_stacks[i][0].this_mv, mvs_from_me[i][1]],
      PredictionMode::NEW_NEARESTMV => [mvs_from_me[i][0], mv_stacks[i][0].comp_mv],
      _ => [MotionVector { row: 0, col: 0 }; 2]
589 590 591
    };
    let mode_set_chroma = vec![luma_mode];

592
    luma_rdo(luma_mode, fs, cw, &mut best, mvs, ref_frames_set[i], &mode_set_chroma, false,
593
             mode_contexts[i], &mv_stacks[i]);
594
  });
595

596
  if !best.skip {
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
    let tx_size = bsize.tx_size();

    // Reduce number of prediction modes at higher speed levels
    let num_modes_rdo = if (fi.frame_type == FrameType::KEY
      && fi.config.speed_settings.prediction_modes >= PredictionModesSetting::ComplexKeyframes)
      || (fi.frame_type == FrameType::INTER && fi.config.speed_settings.prediction_modes >= PredictionModesSetting::ComplexAll)
    {
      7
    } else {
      3
    };

    let intra_mode_set = RAV1E_INTRA_MODES;
    let mut sads = intra_mode_set.iter().map(|&luma_mode| {
      let rec = &mut fs.rec.planes[0];
      let po = bo.plane_offset(&rec.cfg);
      luma_mode.predict_intra(&mut rec.mut_slice(&po), tx_size, seq.bit_depth, &[0i16; 2], 0, 0, fi.w_in_b, fi.h_in_b);

      let plane_org = fs.input.planes[0].slice(&po);
      let plane_ref = rec.slice(&po);

      (luma_mode, get_sad(&plane_org, &plane_ref, tx_size.height(), tx_size.width(), seq.bit_depth))
    }).collect::<Vec<_>>();

    sads.sort_by_key(|a| a.1);

    // Find mode with lowest rate cost
    let mut z = 32768;
    let probs_all = if fi.frame_type == FrameType::INTER {
      cw.get_cdf_intra_mode(bsize)
    } else {
      cw.get_cdf_intra_mode_kf(bo)
    }.iter().take(INTRA_MODES).map(|&a| { let d = z - a; z = a; d }).collect::<Vec<_>>();


    let mut probs = intra_mode_set.iter().map(|&a| (a, probs_all[a as usize])).collect::<Vec<_>>();
    probs.sort_by_key(|a| !a.1);

    let mut modes = Vec::new();
    probs.iter().take(num_modes_rdo / 2).for_each(|&(luma_mode, _prob)| modes.push(luma_mode));
    sads.iter().take(num_modes_rdo).for_each(|&(luma_mode, _sad)| if !modes.contains(&luma_mode) { modes.push(luma_mode) } );

    modes.iter().take(num_modes_rdo).for_each(|&luma_mode| {
Josh Holmer's avatar
Josh Holmer committed
640
      let mvs = [MotionVector { row: 0, col: 0 }; 2];
641
      let ref_frames = [INTRA_FRAME, NONE_FRAME];
642 643 644 645
      let mut mode_set_chroma = vec![luma_mode];
      if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
        mode_set_chroma.push(PredictionMode::DC_PRED);
      }
fbossen's avatar
fbossen committed
646
      luma_rdo(luma_mode, fs, cw, &mut best, mvs, ref_frames, &mode_set_chroma, true,
647
               0, &Vec::new());
648
    });
649
  }
Michael Bebenita's avatar
Michael Bebenita committed
650

651
  if best.mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() {
652 653
    let chroma_mode = PredictionMode::UV_CFL_PRED;
    let cw_checkpoint = cw.checkpoint();
654
    let wr: &mut dyn Writer = &mut WriterCounter::new();
655 656 657 658 659
    write_tx_blocks(
      fi,
      fs,
      cw,
      wr,
660 661
      best.mode_luma,
      best.mode_luma,
662 663
      bo,
      bsize,
664 665
      best.tx_size,
      best.tx_type,
666 667
      false,
      seq.bit_depth,
668
      seq.chroma_sampling,
669
      CFLParams::new(),
670 671
      true,
      false
672 673
    );
    cw.rollback(&cw_checkpoint);
674
    if let Some(cfl) = rdo_cfl_alpha(fs, bo, bsize, seq.bit_depth, seq.chroma_sampling) {
675 676 677
      let mut wr: &mut dyn Writer = &mut WriterCounter::new();
      let tell = wr.tell_frac();

678
      encode_block_a(seq, fs, cw, wr, bsize, bo, best.skip);
679 680 681 682 683 684
      encode_block_b(
        seq,
        fi,
        fs,
        cw,
        wr,
685
        best.mode_luma,
686
        chroma_mode,
687
        best.ref_frames,
Josh Holmer's avatar
Josh Holmer committed
688
        best.mvs,
689 690
        bsize,
        bo,
691
        best.skip,
692 693
        seq.bit_depth,
        cfl,
694 695
        best.tx_size,
        best.tx_type,
696
        0,
697 698
        &Vec::new(),
        false // For CFL, luma should be always reconstructed.
699 700 701
      );

      let cost = wr.tell_frac() - tell;
702 703

      // For CFL, tx-domain distortion is not an option.
704
      let rd =
705 706 707 708 709 710 711 712 713 714 715
        compute_rd_cost(
          fi,
          fs,
          w,
          h,
          is_chroma_block,
          bo,
          cost,
          seq.bit_depth,
          false
        );
716

717 718 719 720
      if rd < best.rd {
        best.rd = rd;
        best.mode_chroma = chroma_mode;
        best.cfl_params = cfl;
721 722 723 724 725 726
      }

      cw.rollback(&cw_checkpoint);
    }
  }

727
  cw.bc.set_mode(bo, bsize, best.mode_luma);
728
  cw.bc.set_ref_frames(bo, bsize, best.ref_frames);
Josh Holmer's avatar
Josh Holmer committed
729
  cw.bc.set_motion_vectors(bo, bsize, best.mvs);
730

731
  assert!(best.rd >= 0_f64);
Michael Bebenita's avatar
Michael Bebenita committed
732

Michael Bebenita's avatar
Michael Bebenita committed
733
  RDOOutput {
734
    rd_cost: best.rd,
Michael Bebenita's avatar
Michael Bebenita committed
735 736 737
    part_type: PartitionType::PARTITION_NONE,
    part_modes: vec![RDOPartitionOutput {
      bo: bo.clone(),
738
      bsize: bsize,
739 740 741
      pred_mode_luma: best.mode_luma,
      pred_mode_chroma: best.mode_chroma,
      pred_cfl_params: best.cfl_params,
fbossen's avatar
fbossen committed
742 743
      ref_frames: best.ref_frames,
      mvs: best.mvs,
744
      rd_cost: best.rd,
745 746 747
      skip: best.skip,
      tx_size: best.tx_size,
      tx_type: best.tx_type,
Michael Bebenita's avatar
Michael Bebenita committed
748
    }]
Michael Bebenita's avatar
Michael Bebenita committed
749
  }
Michael Bebenita's avatar
Michael Bebenita committed
750 751
}

752
pub fn rdo_cfl_alpha(
753
  fs: &mut FrameState, bo: &BlockOffset, bsize: BlockSize, bit_depth: usize,
754 755
  chroma_sampling: ChromaSampling) -> Option<CFLParams> {
  let uv_tx_size = bsize.largest_uv_tx_size(chroma_sampling);
756 757 758

  let mut ac = [0i16; 32 * 32];
  luma_ac(&mut ac, fs, bo, bsize);
759 760 761 762 763 764 765 766 767 768 769 770
  let best_alpha: Vec<i16> = (1..3)
    .map(|p| {
      let rec = &mut fs.rec.planes[p];
      let input = &fs.input.planes[p];
      let po = bo.plane_offset(&fs.input.planes[p].cfg);
      (-16i16..17i16)
        .min_by_key(|&alpha| {
          PredictionMode::UV_CFL_PRED.predict_intra(
            &mut rec.mut_slice(&po),
            uv_tx_size,
            bit_depth,
            &ac,
Frank Bossen's avatar
Frank Bossen committed
771 772 773 774
            alpha,
            p,
            0, // don't care about frame width for CFL prediction
            0  // don't care about frame height for CFL prediction
775 776 777 778 779 780 781 782 783 784 785 786 787 788
          );
          sse_wxh(
            &input.slice(&po),
            &rec.slice(&po),
            uv_tx_size.width(),
            uv_tx_size.height()
          )
        }).unwrap()
    }).collect();

  if best_alpha[0] == 0 && best_alpha[1] == 0 {
    None
  } else {
    Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1]))
789 790 791
  }
}

792
// RDO-based transform type decision
Michael Bebenita's avatar
Michael Bebenita committed
793
pub fn rdo_tx_type_decision(
Monty Montgomery's avatar
Monty Montgomery committed
794
  fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter,
795
  mode: PredictionMode, ref_frames: [usize; 2], mvs: [MotionVector; 2], bsize: BlockSize, bo: &BlockOffset, tx_size: TxSize,
796
  tx_set: TxSet, bit_depth: usize, chroma_sampling: ChromaSampling
Michael Bebenita's avatar
Michael Bebenita committed
797 798 799 800 801 802 803 804
) -> TxType {
  let mut best_type = TxType::DCT_DCT;
  let mut best_rd = std::f64::MAX;

  // Get block luma and chroma dimensions
  let w = bsize.width();
  let h = bsize.height();

Michael Bebenita's avatar
Michael Bebenita committed
805
  let PlaneConfig { xdec, ydec, .. } = fs.input.planes[1].cfg;
806
  let is_chroma_block = has_chroma(bo, bsize, xdec, ydec);
Michael Bebenita's avatar
Michael Bebenita committed
807

808
  let is_inter = !mode.is_intra();
Michael Bebenita's avatar
Michael Bebenita committed
809

810
  let cw_checkpoint = cw.checkpoint();
Michael Bebenita's avatar
Michael Bebenita committed
811 812 813

  for &tx_type in RAV1E_TX_TYPES {
    // Skip unsupported transform types
Yushin Cho's avatar
Yushin Cho committed
814
    if av1_tx_used[tx_set as usize][tx_type as usize] == 0 {
Michael Bebenita's avatar
Michael Bebenita committed
815 816 817
      continue;
    }

818 819 820
    if is_inter {
      motion_compensate(fi, fs, cw, mode, ref_frames, mvs, bsize, bo, bit_depth, true);
    }
821

gibix's avatar
gibix committed
822
    let mut wr: &mut dyn Writer = &mut WriterCounter::new();
Monty Montgomery's avatar
Monty Montgomery committed
823
    let tell = wr.tell_frac();
824
    let tx_dist = if is_inter {
825
      write_tx_tree(
826
        fi, fs, cw, wr, mode, bo, bsize, tx_size, tx_type, false, bit_depth, chroma_sampling, true, true
827
      )
828
    }  else {
829
      let cfl = CFLParams::new(); // Unused
830
      write_tx_blocks(
831
        fi, fs, cw, wr, mode, mode, bo, bsize, tx_size, tx_type, false, bit_depth, chroma_sampling, cfl, true, true
832 833
      )
    };
Michael Bebenita's avatar
Michael Bebenita committed
834

835
    let cost = wr.tell_frac() - tell;
836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
      let rd = if fi.use_tx_domain_distortion {
        compute_tx_rd_cost(
          fi,
          fs,
          w,
          h,
          is_chroma_block,
          bo,
          cost,
          tx_dist,
          bit_depth,
          false,
          true
        )
      } else {
        compute_rd_cost(
          fi,
          fs,
          w,
          h,
          is_chroma_block,
          bo,
          cost,
          bit_depth,
          true
        )
    };
Michael Bebenita's avatar
Michael Bebenita committed
863 864 865
    if rd < best_rd {
      best_rd = rd;
      best_type = tx_type;
866 867
    }

868
    cw.rollback(&cw_checkpoint);
Michael Bebenita's avatar
Michael Bebenita committed
869 870 871
  }

  assert!(best_rd >= 0_f64);
872

Michael Bebenita's avatar
Michael Bebenita committed
873
  best_type
874 875
}

876 877 878 879
pub fn get_sub_partitions<'a>(four_partitions: &[&'a BlockOffset; 4],
   partition: PartitionType) -> Vec<&'a BlockOffset> {
  let mut partitions = vec![ four_partitions[0] ];

880 881 882
  if partition == PARTITION_NONE {
      return partitions;
  }
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
  if partition == PARTITION_VERT || partition == PARTITION_SPLIT {
     partitions.push(four_partitions[1]);
  };
  if partition == PARTITION_HORZ || partition == PARTITION_SPLIT {
     partitions.push(four_partitions[2]);
  };
  if partition == PARTITION_SPLIT {
     partitions.push(four_partitions[3]);
  };

  partitions
}

pub fn get_sub_partitions_with_border_check<'a>(four_partitions: &[&'a BlockOffset; 4],
   partition: PartitionType, fi: &FrameInvariants, subsize: BlockSize) -> Vec<&'a BlockOffset> {
  let mut partitions = vec![ four_partitions[0] ];

900 901 902
  if partition == PARTITION_NONE {
      return partitions;
  }
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
  let hbsw = subsize.width_mi(); // Half the block size width in blocks
  let hbsh = subsize.height_mi(); // Half the block size height in blocks

  if partition == PARTITION_VERT || partition == PARTITION_SPLIT {
    if four_partitions[1].x + hbsw as usize <= fi.w_in_b &&
      four_partitions[1].y + hbsh as usize <= fi.h_in_b {
        partitions.push(four_partitions[1]); }
  };
  if partition == PARTITION_HORZ || partition == PARTITION_SPLIT {
    if four_partitions[2].x + hbsw as usize <= fi.w_in_b &&
      four_partitions[2].y + hbsh as usize <= fi.h_in_b {
        partitions.push(four_partitions[2]); }
  };
  if partition == PARTITION_SPLIT {
    if four_partitions[3].x + hbsw as usize <= fi.w_in_b &&
      four_partitions[3].y + hbsh as usize <= fi.h_in_b {
        partitions.push(four_partitions[3]); }
  };

  partitions
}

925
// RDO-based single level partitioning decision
Michael Bebenita's avatar
Michael Bebenita committed
926
pub fn rdo_partition_decision(
927
  seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
928 929
  cw: &mut ContextWriter, w_pre_cdef: &mut dyn Writer, w_post_cdef: &mut dyn Writer,
  bsize: BlockSize, bo: &BlockOffset,
930 931
  cached_block: &RDOOutput, pmvs: &[[Option<MotionVector>; REF_FRAMES]; 5],
  partition_types: &Vec<PartitionType>,
932
) -> RDOOutput {
Michael Bebenita's avatar
Michael Bebenita committed
933 934 935 936
  let mut best_partition = cached_block.part_type;
  let mut best_rd = cached_block.rd_cost;
  let mut best_pred_modes = cached_block.part_modes.clone();

937
  for &partition in partition_types {
Michael Bebenita's avatar
Michael Bebenita committed
938
    // Do not re-encode results we already have
939
    if partition == cached_block.part_type {
Michael Bebenita's avatar
Michael Bebenita committed
940 941
      continue;
    }
942

943
    let mut cost: f64 = 0.0;
Michael Bebenita's avatar
Michael Bebenita committed
944
    let mut child_modes = std::vec::Vec::new();
945

Michael Bebenita's avatar
Michael Bebenita committed
946 947
    match partition {
      PartitionType::PARTITION_NONE => {
948
        if bsize > BlockSize::BLOCK_64X64 {
Michael Bebenita's avatar
Michael Bebenita committed
949
          continue;
950 951
        }

952 953 954 955 956 957
        let pmv_idx = if bsize > BlockSize::BLOCK_32X32 {
          0
        } else {
          ((bo.x & 32) >> 5) + ((bo.y & 32) >> 4) + 1
        };

958 959
        let spmvs = &pmvs[pmv_idx];

960
        let mode_decision = rdo_mode_decision(seq, fi, fs, cw, bsize, bo, spmvs, false).part_modes[0].clone();
Michael Bebenita's avatar
Michael Bebenita committed
961 962
        child_modes.push(mode_decision);
      }
963 964 965
      PARTITION_SPLIT |
      PARTITION_HORZ |
      PARTITION_VERT => {
966
        let subsize = bsize.subsize(partition);
Michael Bebenita's avatar
Michael Bebenita committed
967 968 969

        if subsize == BlockSize::BLOCK_INVALID {
          continue;
970
        }
971

972
        //pmv = best_pred_modes[0].mvs[0];
973

974
        assert!(best_pred_modes.len() <= 4);
975 976 977 978

        let hbsw = subsize.width_mi(); // Half the block size width in blocks
        let hbsh = subsize.height_mi(); // Half the block size height in blocks
        let four_partitions = [
979
          bo,
980 981 982
          &BlockOffset{ x: bo.x + hbsw as usize, y: bo.y },
          &BlockOffset{ x: bo.x, y: bo.y + hbsh as usize },
          &BlockOffset{ x: bo.x + hbsw as usize, y: bo.y + hbsh as usize }
983
        ];
984
        let partitions = get_sub_partitions_with_border_check(&four_partitions, partition, fi, subsize);
985 986

        let pmv_idxs = partitions.iter().map(|&offset| {
987
          if subsize.greater_than(BlockSize::BLOCK_32X32) {
988 989 990 991 992 993
              0
          } else {
              ((offset.x & 32) >> 5) + ((offset.y & 32) >> 4) + 1
          }
        }).collect::<Vec<_>>();

994 995 996 997
        let cw_checkpoint = cw.checkpoint();
        let w_pre_checkpoint = w_pre_cdef.checkpoint();
        let w_post_checkpoint = w_post_cdef.checkpoint();

998 999 1000 1001 1002 1003 1004
        if bsize >= BlockSize::BLOCK_8X8 {
          let w: &mut dyn Writer = if cw.bc.cdef_coded {w_post_cdef} else {w_pre_cdef};
          let tell = w.tell_frac();
          cw.write_partition(w, bo, partition, bsize);
          cost = (w.tell_frac() - tell) as f64 * get_lambda(fi, seq.bit_depth)/ ((1 << OD_BITRES) as f64);
        }

1005 1006
        child_modes.extend(
          partitions
1007 1008
            .iter().zip(pmv_idxs)
            .map(|(&offset, pmv_idx)| {
1009
              let mode_decision =
1010
              rdo_mode_decision(seq, fi, fs, cw, subsize, &offset,
1011
                &pmvs[pmv_idx], true)
1012
                .part_modes[0]
1013 1014
                .clone();

1015 1016 1017
                if subsize >= BlockSize::BLOCK_8X8 && subsize.is_sqr() {
                  let w: &mut dyn Writer = if cw.bc.cdef_coded {w_post_cdef} else {w_pre_cdef};
                  cw.write_partition(w, offset, PartitionType::PARTITION_NONE, subsize);
1018
                }
1019

1020
                encode_block_with_modes(seq, fi, fs, cw, w_pre_cdef, w_post_cdef, subsize,
1021
                                    offset, &mode_decision);
1022
                mode_decision
1023 1024
            }).collect::<Vec<_>>()
        );
1025 1026 1027
        cw.rollback(&cw_checkpoint);
        w_pre_cdef.rollback(&w_pre_checkpoint);
        w_post_cdef.rollback(&w_post_checkpoint);
Michael Bebenita's avatar
Michael Bebenita committed
1028 1029 1030 1031 1032
      }
      _ => {
        assert!(false);
      }
    }
1033

1034
    let rd = cost + child_modes.iter().map(|m| m.rd_cost).sum::<f64>();
Michael Bebenita's avatar
Michael Bebenita committed
1035 1036 1037 1038 1039

    if rd < best_rd {
      best_rd = rd;
      best_partition = partition;
      best_pred_modes = child_modes.clone();
1040
    }
Michael Bebenita's avatar
Michael Bebenita committed
1041 1042 1043
  }

  assert!(best_rd >= 0_f64);
1044

Michael Bebenita's avatar
Michael Bebenita committed
1045
  RDOOutput {
Michael Bebenita's avatar
Michael Bebenita committed
1046 1047 1048
    rd_cost: best_rd,
    part_type: best_partition,
    part_modes: best_pred_modes
Michael Bebenita's avatar
Michael Bebenita committed
1049
  }
1050
}
1051

Monty's avatar
Monty committed
1052 1053 1054 1055 1056 1057 1058 1059 1060
pub fn rdo_cdef_decision(sbo: &SuperBlockOffset, fi: &FrameInvariants,
                         fs: &FrameState, cw: &mut ContextWriter, bit_depth: usize) -> u8 {
    // FIXME: 128x128 SB support will break this, we need FilterBlockOffset etc.
    // Construct a single-superblock-sized frame to test-filter into
    let sbo_0 = SuperBlockOffset { x: 0, y: 0 };
    let bc = &mut cw.bc;
    let mut cdef_output = Frame {
        planes: [
            Plane::new(64 >> fs.rec.planes[0].cfg.xdec, 64 >> fs.rec.planes[0].cfg.ydec,
fbossen's avatar
fbossen committed
1061
                       fs.rec.planes[0].cfg.xdec, fs.rec.planes[0].cfg.ydec, 0, 0),
Monty's avatar
Monty committed
1062
            Plane::new(64 >> fs.rec.planes[1].cfg.xdec, 64 >> fs.rec.planes[1].cfg.ydec,
fbossen's avatar
fbossen committed
1063
                       fs.rec.planes[1].cfg.xdec, fs.rec.planes[1].cfg.ydec, 0, 0),
Monty's avatar
Monty committed
1064
            Plane::new(64 >> fs.rec.planes[2].cfg.xdec, 64 >> fs.rec.planes[2].cfg.ydec,
fbossen's avatar
fbossen committed
1065
                       fs.rec.planes[2].cfg.xdec, fs.rec.planes[2].cfg.ydec, 0, 0),
Monty's avatar
Monty committed
1066 1067 1068 1069 1070 1071
        ]
    };
    // Construct a padded input
    let mut rec_input = Frame {
        planes: [
            Plane::new((64 >> fs.rec.planes[0].cfg.xdec)+4, (64 >> fs.rec.planes[0].cfg.ydec)+4,
fbossen's avatar
fbossen committed
1072
                       fs.rec.planes[0].cfg.xdec, fs.rec.planes[0].cfg.ydec, 0, 0),
Monty's avatar
Monty committed
1073
            Plane::new((64 >> fs.rec.planes[1].cfg.xdec)+4, (64 >> fs.rec.planes[1].cfg.ydec)+4,
fbossen's avatar
fbossen committed
1074
                       fs.rec.planes[1].cfg.xdec, fs.rec.planes[1].cfg.ydec, 0, 0),
Monty's avatar
Monty committed
1075
            Plane::new((64 >> fs.rec.planes[2].cfg.xdec)+4, (64 >> fs.rec.planes[2].cfg.ydec)+4,
fbossen's avatar
fbossen committed
1076
                       fs.rec.planes[2].cfg.xdec, fs.rec.planes[2].cfg.ydec, 0, 0),
Monty's avatar
Monty committed
1077 1078 1079 1080 1081 1082
        ]
    };
    // Copy reconstructed data into padded input
    for p in 0..3 {
        let xdec = fs.rec.planes[p].cfg.xdec;
        let ydec = fs.rec.planes[p].cfg.ydec;
fbossen's avatar
fbossen committed
1083 1084
        let h = fi.padded_h as isize >> ydec;
        let w = fi.padded_w as isize >> xdec;
Monty's avatar
Monty committed
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099
        let offset = sbo.plane_offset(&fs.rec.planes[p].cfg);
        for y in 0..(64>>ydec)+4 {
            let mut rec_slice = rec_input.planes[p].mut_slice(&PlaneOffset {x:0, y:y});
            let mut rec_row = rec_slice.as_mut_slice();
            if offset.y+y < 2 || offset.y+y >= h+2 {
                // above or below the frame, fill with flag
                for x in 0..(64>>xdec)+4 { rec_row[x] = CDEF_VERY_LARGE; }
            } else {
                let mut in_slice = fs.rec.planes[p].slice(&PlaneOffset {x:0, y:offset.y+y-2});
                let mut in_row = in_slice.as_slice();
                // are we guaranteed to be all in frame this row?
                if offset.x < 2 || offset.x+(64>>xdec)+2 >= w {
                    // No; do it the hard way.  off left or right edge, fill with flag.
                    for x in 0..(64>>xdec)+4 {
                        if offset.x+x >= 2 && offset.x+x < w+2 {
fbossen's avatar
fbossen committed
1100
                            rec_row[x as usize] = in_row[(offset.x+x-2) as usize]
Monty's avatar
Monty committed
1101
                        } else {
fbossen's avatar
fbossen committed
1102
                            rec_row[x as usize] = CDEF_VERY_LARGE;
Monty's avatar
Monty committed
1103 1104 1105 1106
                        }
                    }
                }  else  {
                    // Yes, do it the easy way: just copy
fbossen's avatar
fbossen committed
1107
                    rec_row[0..(64>>xdec)+4].copy_from_slice(&in_row[(offset.x-2) as usize..(offset.x+(64>>xdec)+2) as usize]);
Monty's avatar
Monty committed
1108 1109 1110 1111 1112 1113 1114 1115
                }
            }
        }
    }

    // RDO comparisons
    let mut best_index: u8 = 0;
    let mut best_err: u64 = 0;
1116
    let cdef_dirs = cdef_analyze_superblock(&mut rec_input, bc, &sbo_0, &sbo, bit_depth);
Monty's avatar
Monty committed
1117 1118 1119 1120 1121
    for cdef_index in 0..(1<<fi.cdef_bits) {
        //for p in 0..3 {
        //    for i in 0..cdef_output.planes[p].data.len() { cdef_output.planes[p].data[i] = CDEF_VERY_LARGE; }
        //}
        // TODO: Don't repeat find_direction over and over; split filter_superblock to run it separately
1122 1123
        cdef_filter_superblock(fi, &mut rec_input, &mut cdef_output,
                               bc, &sbo_0, &sbo, bit_depth, cdef_index, &cdef_dirs);
Monty's avatar
Monty committed
1124 1125 1126 1127 1128 1129 1130 1131 1132

        // Rate is constant, compute just distortion
        // Computation is block by block, paying attention to skip flag

        // Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
        // We're dealing only with in-frmae and unpadded planes now
        let mut err:u64 = 0;
        for by in 0..8 {
            for bx in 0..8 {
1133 1134
                let bo = sbo.block_offset(bx<<1, by<<1);
                if bo.x < bc.cols && bo.y < bc.rows {