vp9_spatial_scalable_encoder.c 9.65 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

/*
 * This is an example demonstrating how to implement a multi-layer
 * VP9 encoding scheme based on spatial scalability for video applications
 * that benefit from a scalable bitstream.
 */
16

17
#include <stdarg.h>
18
#include <stdlib.h>
19
#include <string.h>
20 21
#include <time.h>
#include "./args.h"
22 23
#include "./ivfenc.h"
#include "./tools_common.h"
24
#include "vpx/svc_context.h"
25
#include "vpx/vp8cx.h"
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#include "vpx/vpx_encoder.h"

static const struct arg_enum_list encoding_mode_enum[] = {
  {"i", INTER_LAYER_PREDICTION_I},
  {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP},
  {"ip", INTER_LAYER_PREDICTION_IP},
  {"gf", USE_GOLDEN_FRAME},
  {NULL, 0}
};

static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM(
    "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum);
static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
    ARG_DEF("f", "frames", 1, "number of frames to encode");
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
static const arg_def_t timebase_arg =
    ARG_DEF("t", "timebase", 1, "timebase (num/den)");
static const arg_def_t bitrate_arg = ARG_DEF(
    "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
static const arg_def_t layers_arg =
    ARG_DEF("l", "layers", 1, "number of SVC layers");
static const arg_def_t kf_dist_arg =
    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
    ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
static const arg_def_t quantizers_arg =
    ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)");

static const arg_def_t *svc_args[] = {
  &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
60
  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  NULL
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
};

static const SVC_ENCODING_MODE default_encoding_mode =
    INTER_LAYER_PREDICTION_IP;
static const uint32_t default_frames_to_skip = 0;
static const uint32_t default_frames_to_code = 60 * 60;
static const uint32_t default_width = 1920;
static const uint32_t default_height = 1080;
static const uint32_t default_timebase_num = 1;
static const uint32_t default_timebase_den = 60;
static const uint32_t default_bitrate = 1000;
static const uint32_t default_spatial_layers = 5;
static const uint32_t default_kf_dist = 100;

typedef struct {
  char *output_filename;
  uint32_t frames_to_code;
  uint32_t frames_to_skip;
79
  struct VpxInputContext input_ctx;
80 81
} AppInput;

82 83 84
static const char *exec_name;

void usage_exit() {
85 86 87 88
  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
          exec_name);
  fprintf(stderr, "Options:\n");
  arg_show_usage(stderr, svc_args);
89 90 91
  exit(EXIT_FAILURE);
}

92 93 94 95 96
static void parse_command_line(int argc, const char **argv_,
                               AppInput *app_input, SvcContext *svc_ctx,
                               vpx_codec_enc_cfg_t *enc_cfg) {
  struct arg arg;
  char **argv, **argi, **argj;
97 98
  vpx_codec_err_t res;

99 100 101 102
  // initialize SvcContext with parameters that will be passed to vpx_svc_init
  svc_ctx->log_level = SVC_LOG_DEBUG;
  svc_ctx->spatial_layers = default_spatial_layers;
  svc_ctx->encoding_mode = default_encoding_mode;
103

104 105
  // start with default encoder configuration
  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
106 107 108
  if (res) {
    die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
  }
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
  // update enc_cfg with app default values
  enc_cfg->g_w = default_width;
  enc_cfg->g_h = default_height;
  enc_cfg->g_timebase.num = default_timebase_num;
  enc_cfg->g_timebase.den = default_timebase_den;
  enc_cfg->rc_target_bitrate = default_bitrate;
  enc_cfg->kf_min_dist = default_kf_dist;
  enc_cfg->kf_max_dist = default_kf_dist;

  // initialize AppInput with default values
  app_input->frames_to_code = default_frames_to_code;
  app_input->frames_to_skip = default_frames_to_skip;

  // process command line options
  argv = argv_dup(argc - 1, argv_ + 1);
  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    arg.argv_step = 1;

    if (arg_match(&arg, &encoding_mode_arg, argi)) {
      svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg);
    } else if (arg_match(&arg, &frames_arg, argi)) {
      app_input->frames_to_code = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &width_arg, argi)) {
      enc_cfg->g_w = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &height_arg, argi)) {
      enc_cfg->g_h = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &timebase_arg, argi)) {
      enc_cfg->g_timebase = arg_parse_rational(&arg);
    } else if (arg_match(&arg, &bitrate_arg, argi)) {
      enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &skip_frames_arg, argi)) {
      app_input->frames_to_skip = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &layers_arg, argi)) {
      svc_ctx->spatial_layers = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
      vpx_svc_set_scale_factors(svc_ctx, arg.val);
    } else if (arg_match(&arg, &quantizers_arg, argi)) {
      vpx_svc_set_quantizers(svc_ctx, arg.val);
    } else {
      ++argj;
    }
  }
James Zern's avatar
James Zern committed
154

155 156 157 158
  // Check for unrecognized options
  for (argi = argv; *argi; ++argi)
    if (argi[0][0] == '-' && strlen(argi[0]) > 1)
      die("Error: Unrecognized option %s\n", *argi);
James Zern's avatar
James Zern committed
159

160
  if (argv[0] == NULL || argv[1] == 0) {
161
    usage_exit();
162
  }
163
  app_input->input_ctx.filename = argv[0];
164 165
  app_input->output_filename = argv[1];
  free(argv);
James Zern's avatar
James Zern committed
166

167 168 169
  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
      enc_cfg->g_h % 2)
    die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
James Zern's avatar
James Zern committed
170

171 172 173 174 175
  printf(
      "Codec %s\nframes: %d, skip: %d\n"
      "mode: %d, layers: %d\n"
      "width %d, height: %d,\n"
      "num: %d, den: %d, bitrate: %d,\n"
176
      "gop size: %d\n",
177 178 179 180
      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
      app_input->frames_to_skip, svc_ctx->encoding_mode,
      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
181
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
182 183
}

184 185
int main(int argc, const char **argv) {
  AppInput app_input = {0};
186
  FILE *outfile;
187
  vpx_codec_ctx_t codec;
188 189 190 191
  vpx_codec_enc_cfg_t enc_cfg;
  SvcContext svc_ctx;
  uint32_t i;
  uint32_t frame_cnt = 0;
192
  vpx_image_t raw;
193
  vpx_codec_err_t res;
194 195 196
  int pts = 0;            /* PTS starts at 0 */
  int frame_duration = 1; /* 1 timebase tick per frame */

197 198
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
199
  exec_name = argv[0];
200
  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
201 202

  // Allocate image buffer
203 204 205
  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32))
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);

206 207
  if (!(app_input.input_ctx.file = fopen(app_input.input_ctx.filename, "rb")))
    die("Failed to open %s for reading\n", app_input.input_ctx.filename);
208 209 210 211 212 213 214 215

  if (!(outfile = fopen(app_input.output_filename, "wb")))
    die("Failed to open %s for writing\n", app_input.output_filename);

  // Initialize codec
  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
      VPX_CODEC_OK)
    die("Failed to initialize encoder\n");
Ivan Maltz's avatar
Ivan Maltz committed
216

217
  ivf_write_file_header(outfile, &enc_cfg, VP9_FOURCC, 0);
218 219

  // skip initial frames
220
  for (i = 0; i < app_input.frames_to_skip; ++i) {
221
    read_yuv_frame(&app_input.input_ctx, &raw);
222 223
  }

224
  // Encode frames
225 226 227
  while (frame_cnt < app_input.frames_to_code) {
    if (read_yuv_frame(&app_input.input_ctx, &raw)) break;

228 229 230 231
    res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
                         VPX_DL_REALTIME);
    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
232 233
      die_codec(&codec, "Failed to encode frame");
    }
234
    if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
235
      ivf_write_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
236 237 238 239
      (void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
                   vpx_svc_get_frame_size(&svc_ctx), outfile);
    }
    ++frame_cnt;
240
    pts += frame_duration;
James Zern's avatar
James Zern committed
241
  }
242

243
  printf("Processed %d frames\n", frame_cnt);
244

245
  fclose(app_input.input_ctx.file);
246 247
  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");

248 249
  // rewrite the output file headers with the actual frame count, and
  // resolution of the highest layer
250
  if (!fseek(outfile, 0, SEEK_SET)) {
251 252 253 254 255 256 257
    // get resolution of highest layer
    if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(&svc_ctx,
                                                     svc_ctx.spatial_layers - 1,
                                                     &enc_cfg.g_w,
                                                     &enc_cfg.g_h)) {
      die("Failed to get output resolution");
    }
258
    ivf_write_file_header(outfile, &enc_cfg, VP9_FOURCC, frame_cnt);
259
  }
260 261 262 263 264 265 266
  fclose(outfile);
  vpx_img_free(&raw);

  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));

  vpx_svc_release(&svc_ctx);
267 268 269

  return EXIT_SUCCESS;
}