vp9_spatial_scalable_encoder.c 9.59 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

/*
 * This is an example demonstrating how to implement a multi-layer
 * VP9 encoding scheme based on spatial scalability for video applications
 * that benefit from a scalable bitstream.
 */
16

17
#include <stdarg.h>
18
#include <stdlib.h>
19
#include <string.h>
20
#include <time.h>
21

22
#include "./args.h"
23
#include "./tools_common.h"
24 25
#include "./video_writer.h"

26
#include "vpx/svc_context.h"
27
#include "vpx/vp8cx.h"
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
#include "vpx/vpx_encoder.h"

static const struct arg_enum_list encoding_mode_enum[] = {
  {"i", INTER_LAYER_PREDICTION_I},
  {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP},
  {"ip", INTER_LAYER_PREDICTION_IP},
  {"gf", USE_GOLDEN_FRAME},
  {NULL, 0}
};

static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM(
    "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum);
static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
    ARG_DEF("f", "frames", 1, "number of frames to encode");
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
static const arg_def_t timebase_arg =
    ARG_DEF("t", "timebase", 1, "timebase (num/den)");
static const arg_def_t bitrate_arg = ARG_DEF(
    "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
static const arg_def_t layers_arg =
    ARG_DEF("l", "layers", 1, "number of SVC layers");
static const arg_def_t kf_dist_arg =
    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
    ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
static const arg_def_t quantizers_arg =
    ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)");

static const arg_def_t *svc_args[] = {
  &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
62
  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  NULL
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
};

static const SVC_ENCODING_MODE default_encoding_mode =
    INTER_LAYER_PREDICTION_IP;
static const uint32_t default_frames_to_skip = 0;
static const uint32_t default_frames_to_code = 60 * 60;
static const uint32_t default_width = 1920;
static const uint32_t default_height = 1080;
static const uint32_t default_timebase_num = 1;
static const uint32_t default_timebase_den = 60;
static const uint32_t default_bitrate = 1000;
static const uint32_t default_spatial_layers = 5;
static const uint32_t default_kf_dist = 100;

typedef struct {
  char *output_filename;
  uint32_t frames_to_code;
  uint32_t frames_to_skip;
81
  struct VpxInputContext input_ctx;
82 83
} AppInput;

84 85 86
static const char *exec_name;

void usage_exit() {
87 88 89 90
  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
          exec_name);
  fprintf(stderr, "Options:\n");
  arg_show_usage(stderr, svc_args);
91 92 93
  exit(EXIT_FAILURE);
}

94 95 96 97 98
static void parse_command_line(int argc, const char **argv_,
                               AppInput *app_input, SvcContext *svc_ctx,
                               vpx_codec_enc_cfg_t *enc_cfg) {
  struct arg arg;
  char **argv, **argi, **argj;
99 100
  vpx_codec_err_t res;

101 102 103 104
  // initialize SvcContext with parameters that will be passed to vpx_svc_init
  svc_ctx->log_level = SVC_LOG_DEBUG;
  svc_ctx->spatial_layers = default_spatial_layers;
  svc_ctx->encoding_mode = default_encoding_mode;
105

106 107
  // start with default encoder configuration
  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
108 109 110
  if (res) {
    die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
  }
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
  // update enc_cfg with app default values
  enc_cfg->g_w = default_width;
  enc_cfg->g_h = default_height;
  enc_cfg->g_timebase.num = default_timebase_num;
  enc_cfg->g_timebase.den = default_timebase_den;
  enc_cfg->rc_target_bitrate = default_bitrate;
  enc_cfg->kf_min_dist = default_kf_dist;
  enc_cfg->kf_max_dist = default_kf_dist;

  // initialize AppInput with default values
  app_input->frames_to_code = default_frames_to_code;
  app_input->frames_to_skip = default_frames_to_skip;

  // process command line options
  argv = argv_dup(argc - 1, argv_ + 1);
  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    arg.argv_step = 1;

    if (arg_match(&arg, &encoding_mode_arg, argi)) {
      svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg);
    } else if (arg_match(&arg, &frames_arg, argi)) {
      app_input->frames_to_code = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &width_arg, argi)) {
      enc_cfg->g_w = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &height_arg, argi)) {
      enc_cfg->g_h = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &timebase_arg, argi)) {
      enc_cfg->g_timebase = arg_parse_rational(&arg);
    } else if (arg_match(&arg, &bitrate_arg, argi)) {
      enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &skip_frames_arg, argi)) {
      app_input->frames_to_skip = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &layers_arg, argi)) {
      svc_ctx->spatial_layers = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
      vpx_svc_set_scale_factors(svc_ctx, arg.val);
    } else if (arg_match(&arg, &quantizers_arg, argi)) {
      vpx_svc_set_quantizers(svc_ctx, arg.val);
    } else {
      ++argj;
    }
  }
James Zern's avatar
James Zern committed
156

157 158 159 160
  // Check for unrecognized options
  for (argi = argv; *argi; ++argi)
    if (argi[0][0] == '-' && strlen(argi[0]) > 1)
      die("Error: Unrecognized option %s\n", *argi);
James Zern's avatar
James Zern committed
161

162
  if (argv[0] == NULL || argv[1] == 0) {
163
    usage_exit();
164
  }
165
  app_input->input_ctx.filename = argv[0];
166 167
  app_input->output_filename = argv[1];
  free(argv);
James Zern's avatar
James Zern committed
168

169 170 171
  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
      enc_cfg->g_h % 2)
    die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
James Zern's avatar
James Zern committed
172

173 174 175 176 177
  printf(
      "Codec %s\nframes: %d, skip: %d\n"
      "mode: %d, layers: %d\n"
      "width %d, height: %d,\n"
      "num: %d, den: %d, bitrate: %d,\n"
178
      "gop size: %d\n",
179 180 181 182
      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
      app_input->frames_to_skip, svc_ctx->encoding_mode,
      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
183
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
184 185
}

186 187
int main(int argc, const char **argv) {
  AppInput app_input = {0};
188 189
  VpxVideoWriter *writer = NULL;
  VpxVideoInfo info = {0};
190
  vpx_codec_ctx_t codec;
191 192 193 194
  vpx_codec_enc_cfg_t enc_cfg;
  SvcContext svc_ctx;
  uint32_t i;
  uint32_t frame_cnt = 0;
195
  vpx_image_t raw;
196
  vpx_codec_err_t res;
197 198 199
  int pts = 0;            /* PTS starts at 0 */
  int frame_duration = 1; /* 1 timebase tick per frame */

200 201
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
202
  exec_name = argv[0];
203
  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
204 205

  // Allocate image buffer
206 207 208
  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32))
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);

209 210
  if (!(app_input.input_ctx.file = fopen(app_input.input_ctx.filename, "rb")))
    die("Failed to open %s for reading\n", app_input.input_ctx.filename);
211 212 213 214 215

  // Initialize codec
  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
      VPX_CODEC_OK)
    die("Failed to initialize encoder\n");
Ivan Maltz's avatar
Ivan Maltz committed
216

217 218 219 220 221 222 223 224 225 226 227 228 229
  info.codec_fourcc = VP9_FOURCC;
  info.time_base.numerator = enc_cfg.g_timebase.num;
  info.time_base.denominator = enc_cfg.g_timebase.den;
  if (vpx_svc_get_layer_resolution(&svc_ctx, svc_ctx.spatial_layers - 1,
                                   (unsigned int *)&info.frame_width,
                                   (unsigned int *)&info.frame_height) !=
      VPX_CODEC_OK) {
    die("Failed to get output resolution");
  }
  writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
                                 &info);
  if (!writer)
    die("Failed to open %s for writing\n", app_input.output_filename);
230 231

  // skip initial frames
232
  for (i = 0; i < app_input.frames_to_skip; ++i) {
233
    read_yuv_frame(&app_input.input_ctx, &raw);
234 235
  }

236
  // Encode frames
237 238 239
  while (frame_cnt < app_input.frames_to_code) {
    if (read_yuv_frame(&app_input.input_ctx, &raw)) break;

240 241 242 243
    res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
                         VPX_DL_REALTIME);
    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
244 245
      die_codec(&codec, "Failed to encode frame");
    }
246
    if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
247 248 249 250
      vpx_video_writer_write_frame(writer,
                                   vpx_svc_get_buffer(&svc_ctx),
                                   vpx_svc_get_frame_size(&svc_ctx),
                                   pts);
251 252
    }
    ++frame_cnt;
253
    pts += frame_duration;
James Zern's avatar
James Zern committed
254
  }
255

256
  printf("Processed %d frames\n", frame_cnt);
257

258
  fclose(app_input.input_ctx.file);
259 260
  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");

261 262
  vpx_video_writer_close(writer);

263 264 265 266 267 268
  vpx_img_free(&raw);

  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));

  vpx_svc_release(&svc_ctx);
269 270 271

  return EXIT_SUCCESS;
}