22 #include "av1/common/enums.h" 
   23 #include "common/tools_common.h" 
   24 #include "common/video_writer.h" 
   25 #include "aom_ports/aom_timer.h" 
   27 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest)); 
   29 static const char *exec_name;
 
   31 void usage_exit(
void) { exit(EXIT_FAILURE); }
 
   33 static int mode_to_num_temporal_layers[10] = { 1, 2, 3, 3, 2, 1, 1, 3, 3, 3 };
 
   34 static int mode_to_num_spatial_layers[10] = { 1, 1, 1, 1, 1, 2, 3, 3, 3, 3 };
 
   35 static int mode_to_num_layers[10] = { 1, 2, 3, 3, 2, 2, 3, 9, 9, 9 };
 
   38 struct RateControlMetrics {
 
   55   double avg_st_encoding_bitrate;
 
   57   double variance_st_encoding_bitrate;
 
   65 static int read_frame(
struct AvxInputContext *input_ctx, 
aom_image_t *img) {
 
   66   FILE *f = input_ctx->file;
 
   67   y4m_input *y4m = &input_ctx->y4m;
 
   70   if (input_ctx->file_type == FILE_TYPE_Y4M) {
 
   71     if (y4m_input_fetch_frame(y4m, f, img) < 1) 
return 0;
 
   73     shortread = read_yuv_frame(input_ctx, img);
 
   79 static int file_is_y4m(
const char detect[4]) {
 
   80   if (memcmp(detect, 
"YUV4", 4) == 0) {
 
   86 static int fourcc_is_ivf(
const char detect[4]) {
 
   87   if (memcmp(detect, 
"DKIF", 4) == 0) {
 
   93 static void close_input_file(
struct AvxInputContext *input) {
 
   95   if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
 
   98 static void open_input_file(
struct AvxInputContext *input,
 
  101   input->file = strcmp(input->filename, 
"-") ? fopen(input->filename, 
"rb")
 
  102                                              : set_binary_mode(stdin);
 
  104   if (!input->file) fatal(
"Failed to open input file");
 
  106   if (!fseeko(input->file, 0, SEEK_END)) {
 
  110     input->length = ftello(input->file);
 
  115   input->pixel_aspect_ratio.numerator = 1;
 
  116   input->pixel_aspect_ratio.denominator = 1;
 
  121   input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
 
  122   input->detect.position = 0;
 
  124   if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
 
  125     if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
 
  126                        input->only_i420) >= 0) {
 
  127       input->file_type = FILE_TYPE_Y4M;
 
  128       input->width = input->y4m.pic_w;
 
  129       input->height = input->y4m.pic_h;
 
  130       input->pixel_aspect_ratio.numerator = input->y4m.par_n;
 
  131       input->pixel_aspect_ratio.denominator = input->y4m.par_d;
 
  132       input->framerate.numerator = input->y4m.fps_n;
 
  133       input->framerate.denominator = input->y4m.fps_d;
 
  134       input->fmt = input->y4m.aom_fmt;
 
  135       input->bit_depth = input->y4m.bit_depth;
 
  137       fatal(
"Unsupported Y4M stream.");
 
  139   } 
else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
 
  140     fatal(
"IVF is not supported as input.");
 
  142     input->file_type = FILE_TYPE_RAW;
 
  152 static void set_rate_control_metrics(
struct RateControlMetrics *rc,
 
  154                                      unsigned int ss_number_layers,
 
  155                                      unsigned int ts_number_layers) {
 
  157   ts_rate_decimator[0] = 1;
 
  158   if (ts_number_layers == 2) {
 
  159     ts_rate_decimator[0] = 2;
 
  160     ts_rate_decimator[1] = 1;
 
  162   if (ts_number_layers == 3) {
 
  163     ts_rate_decimator[0] = 4;
 
  164     ts_rate_decimator[1] = 2;
 
  165     ts_rate_decimator[2] = 1;
 
  169   for (
unsigned int sl = 0; sl < ss_number_layers; ++sl) {
 
  170     unsigned int i = sl * ts_number_layers;
 
  171     rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
 
  173         1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
 
  174     for (
unsigned int tl = 0; tl < ts_number_layers; ++tl) {
 
  175       i = sl * ts_number_layers + tl;
 
  177         rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
 
  180             (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
 
  181             (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
 
  183       rc->layer_input_frames[tl] = 0;
 
  184       rc->layer_enc_frames[tl] = 0;
 
  185       rc->layer_encoding_bitrate[i] = 0.0;
 
  186       rc->layer_avg_frame_size[i] = 0.0;
 
  187       rc->layer_avg_rate_mismatch[i] = 0.0;
 
  190   rc->window_count = 0;
 
  191   rc->window_size = 15;
 
  192   rc->avg_st_encoding_bitrate = 0.0;
 
  193   rc->variance_st_encoding_bitrate = 0.0;
 
  196 static void printout_rate_control_summary(
struct RateControlMetrics *rc,
 
  198                                           unsigned int ss_number_layers,
 
  199                                           unsigned int ts_number_layers) {
 
  200   int tot_num_frames = 0;
 
  201   double perc_fluctuation = 0.0;
 
  202   printf(
"Total number of processed frames: %d\n\n", frame_cnt - 1);
 
  203   printf(
"Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
 
  204   for (
unsigned int sl = 0; sl < ss_number_layers; ++sl) {
 
  206     for (
unsigned int tl = 0; tl < ts_number_layers; ++tl) {
 
  207       unsigned int i = sl * ts_number_layers + tl;
 
  208       const int num_dropped =
 
  209           tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
 
  210                  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
 
  211       tot_num_frames += rc->layer_input_frames[tl];
 
  212       rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
 
  213                                       rc->layer_encoding_bitrate[i] /
 
  215       rc->layer_avg_frame_size[i] =
 
  216           rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
 
  217       rc->layer_avg_rate_mismatch[i] =
 
  218           100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
 
  219       printf(
"For layer#: %d %d \n", sl, tl);
 
  220       printf(
"Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
 
  221              rc->layer_encoding_bitrate[i]);
 
  222       printf(
"Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
 
  223              rc->layer_avg_frame_size[i]);
 
  224       printf(
"Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
 
  226           "Number of input frames, encoded (non-key) frames, " 
  227           "and perc dropped frames: %d %d %f\n",
 
  228           rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
 
  229           100.0 * num_dropped / rc->layer_input_frames[tl]);
 
  233   rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
 
  234   rc->variance_st_encoding_bitrate =
 
  235       rc->variance_st_encoding_bitrate / rc->window_count -
 
  236       (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
 
  237   perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
 
  238                      rc->avg_st_encoding_bitrate;
 
  239   printf(
"Short-time stats, for window of %d frames:\n", rc->window_size);
 
  240   printf(
"Average, rms-variance, and percent-fluct: %f %f %f\n",
 
  241          rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
 
  243   if (frame_cnt - 1 != tot_num_frames)
 
  244     die(
"Error: Number of input frames not equal to output!\n");
 
  248 static int set_layer_pattern(
int layering_mode, 
int superframe_cnt,
 
  251                              int *use_svc_control, 
int spatial_layer_id,
 
  252                              int is_key_frame, 
int ksvc_mode) {
 
  254   int shift = (layering_mode == 7) ? 2 : 0;
 
  255   *use_svc_control = 1;
 
  260   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->
ref_idx[i] = i;
 
  261   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->
reference[i] = 0;
 
  262   for (i = 0; i < REF_FRAMES; i++) ref_frame_config->
refresh[i] = 0;
 
  275   switch (layering_mode) {
 
  279       ref_frame_config->
refresh[0] = 1;
 
  285       if (superframe_cnt % 2 == 0) {
 
  288         ref_frame_config->
refresh[0] = 1;
 
  300       if (superframe_cnt % 4 == 0) {
 
  304         ref_frame_config->
refresh[0] = 1;
 
  305       } 
else if ((superframe_cnt - 1) % 4 == 0) {
 
  309       } 
else if ((superframe_cnt - 2) % 4 == 0) {
 
  312         ref_frame_config->
refresh[1] = 1;
 
  314       } 
else if ((superframe_cnt - 3) % 4 == 0) {
 
  319         ref_frame_config->
ref_idx[0] = 1;
 
  320         ref_frame_config->
ref_idx[1] = 0;
 
  330       if (superframe_cnt % 4 == 0) {
 
  334         ref_frame_config->
refresh[0] = 1;
 
  336       } 
else if ((superframe_cnt - 1) % 4 == 0) {
 
  340       } 
else if ((superframe_cnt - 2) % 4 == 0) {
 
  343         ref_frame_config->
refresh[3] = 1;
 
  345       } 
else if ((superframe_cnt - 3) % 4 == 0) {
 
  354       *use_svc_control = 0;
 
  357       if (superframe_cnt % 2 == 0) {
 
  373         ref_frame_config->
refresh[0] = 1;
 
  378         ref_frame_config->
ref_idx[0] = 1;
 
  379         ref_frame_config->
ref_idx[3] = 0;
 
  380         ref_frame_config->
refresh[1] = 1;
 
  392         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  393           ref_frame_config->
ref_idx[i] = 0;
 
  394         ref_frame_config->
refresh[0] = 1;
 
  400         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  401           ref_frame_config->
ref_idx[i] = 0;
 
  402         ref_frame_config->
ref_idx[0] = 1;
 
  403         ref_frame_config->
refresh[1] = 1;
 
  408         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  409           ref_frame_config->
ref_idx[i] = 1;
 
  410         ref_frame_config->
ref_idx[0] = 2;
 
  411         ref_frame_config->
refresh[2] = 1;
 
  429       if (superframe_cnt % 4 == 0) {
 
  435           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  436             ref_frame_config->
ref_idx[i] = 0;
 
  437           ref_frame_config->
refresh[0] = 1;
 
  442           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  443             ref_frame_config->
ref_idx[i] = 0;
 
  444           ref_frame_config->
ref_idx[0] = 1;
 
  445           ref_frame_config->
refresh[1] = 1;
 
  450           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  451             ref_frame_config->
ref_idx[i] = 1;
 
  452           ref_frame_config->
ref_idx[0] = 2;
 
  453           ref_frame_config->
refresh[2] = 1;
 
  455       } 
else if ((superframe_cnt - 1) % 4 == 0) {
 
  462           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  463             ref_frame_config->
ref_idx[i] = 0;
 
  464           ref_frame_config->
ref_idx[3] = 3;
 
  465           ref_frame_config->
refresh[3] = 1;
 
  470           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  471             ref_frame_config->
ref_idx[i] = 3;
 
  472           ref_frame_config->
ref_idx[0] = 1;
 
  473           ref_frame_config->
ref_idx[1] = 4;
 
  474           ref_frame_config->
refresh[4] = 1;
 
  479           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  480             ref_frame_config->
ref_idx[i] = 4;
 
  481           ref_frame_config->
ref_idx[0] = 2;
 
  483       } 
else if ((superframe_cnt - 2) % 4 == 0) {
 
  490           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  491             ref_frame_config->
ref_idx[i] = 0;
 
  492           ref_frame_config->
ref_idx[3] = 5 - shift;
 
  493           ref_frame_config->
refresh[5 - shift] = 1;
 
  498           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  499             ref_frame_config->
ref_idx[i] = 5 - shift;
 
  500           ref_frame_config->
ref_idx[0] = 1;
 
  501           ref_frame_config->
ref_idx[2] = 6 - shift;
 
  502           ref_frame_config->
refresh[6 - shift] = 1;
 
  507           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  508             ref_frame_config->
ref_idx[i] = 6 - shift;
 
  509           ref_frame_config->
ref_idx[0] = 2;
 
  510           ref_frame_config->
ref_idx[2] = 7 - shift;
 
  511           ref_frame_config->
refresh[7 - shift] = 1;
 
  513       } 
else if ((superframe_cnt - 3) % 4 == 0) {
 
  520           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  521             ref_frame_config->
ref_idx[i] = 0;
 
  522           ref_frame_config->
ref_idx[0] = 5 - shift;
 
  523           ref_frame_config->
ref_idx[3] = 3;
 
  524           ref_frame_config->
refresh[3] = 1;
 
  528           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  529             ref_frame_config->
ref_idx[i] = 0;
 
  530           ref_frame_config->
ref_idx[0] = 6 - shift;
 
  531           ref_frame_config->
ref_idx[3] = 3;
 
  532           ref_frame_config->
ref_idx[1] = 4;
 
  533           ref_frame_config->
refresh[4] = 1;
 
  537           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
 
  538             ref_frame_config->
ref_idx[i] = 0;
 
  539           ref_frame_config->
ref_idx[0] = 7 - shift;
 
  540           ref_frame_config->
ref_idx[3] = 4;
 
  546     default: assert(0); die(
"Error: Unsupported temporal layering mode!\n");
 
  551 int main(
int argc, 
char **argv) {
 
  560   uint32_t error_resilient = 0;
 
  567   int frame_duration = 1;  
 
  568   int layering_mode = 0;
 
  572   const AvxInterface *encoder = NULL;
 
  573   struct AvxInputContext input_ctx;
 
  574   struct RateControlMetrics rc;
 
  576   const int min_args_base = 13;
 
  577   const int min_args = min_args_base;
 
  578   double sum_bitrate = 0.0;
 
  579   double sum_bitrate2 = 0.0;
 
  580   double framerate = 30.0;
 
  581   int use_svc_control = 1;
 
  582   zero(rc.layer_target_bitrate);
 
  584   memset(&input_ctx, 0, 
sizeof(input_ctx));
 
  585   memset(&svc_params, 0, 
sizeof(svc_params));
 
  589   const int test_dynamic_scaling_single_layer = 0;
 
  592   input_ctx.framerate.numerator = 30;
 
  593   input_ctx.framerate.denominator = 1;
 
  594   input_ctx.only_i420 = 1;
 
  595   input_ctx.bit_depth = 0;
 
  596   unsigned int ts_number_layers = 1;
 
  597   unsigned int ss_number_layers = 1;
 
  600   if (argc < min_args) {
 
  601     die(
"Usage: %s <infile> <outfile> <codec_type(av1)> <width> <height> " 
  602         "<rate_num> <rate_den> <speed> <frame_drop_threshold> " 
  603         "<error_resilient> <threads> <mode> " 
  604         "<Rate_0> ... <Rate_nlayers-1>\n",
 
  608   encoder = get_aom_encoder_by_name(argv[3]);
 
  610   width = (
unsigned int)strtoul(argv[4], NULL, 0);
 
  611   height = (
unsigned int)strtoul(argv[5], NULL, 0);
 
  612   if (width < 16 || width % 2 || height < 16 || height % 2) {
 
  613     die(
"Invalid resolution: %d x %d", width, height);
 
  616   layering_mode = (int)strtol(argv[12], NULL, 0);
 
  617   if (layering_mode < 0 || layering_mode > 13) {
 
  618     die(
"Invalid layering mode (0..12) %s", argv[12]);
 
  621   if (argc != min_args + mode_to_num_layers[layering_mode]) {
 
  622     die(
"Invalid number of arguments");
 
  625   ts_number_layers = mode_to_num_temporal_layers[layering_mode];
 
  626   ss_number_layers = mode_to_num_spatial_layers[layering_mode];
 
  628   input_ctx.filename = argv[1];
 
  629   open_input_file(&input_ctx, 0);
 
  632   if (input_ctx.file_type != FILE_TYPE_Y4M) {
 
  634       die(
"Failed to allocate image", width, height);
 
  653   speed = (int)strtol(argv[8], NULL, 0);
 
  654   if (speed < 0 || speed > 8) {
 
  655     die(
"Invalid speed setting: must be positive");
 
  658   for (i = min_args_base;
 
  659        (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
 
  660     rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0);
 
  668   if (ts_number_layers == 2) {
 
  671   } 
else if (ts_number_layers == 3) {
 
  691   cfg.
g_threads = (
unsigned int)strtoul(argv[11], NULL, 0);
 
  693   error_resilient = (uint32_t)strtoul(argv[10], NULL, 0);
 
  694   if (error_resilient != 0 && error_resilient != 1) {
 
  695     die(
"Invalid value for error resilient (0, 1): %d.", error_resilient);
 
  706   set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
 
  708   if (input_ctx.file_type == FILE_TYPE_Y4M) {
 
  709     if (input_ctx.width != cfg.
g_w || input_ctx.height != cfg.
g_h) {
 
  710       die(
"Incorrect width or height: %d x %d", cfg.
g_w, cfg.
g_h);
 
  714       die(
"Incorrect framerate: numerator %d denominator %d",
 
  720   for (
unsigned int sl = 0; sl < ss_number_layers; ++sl) {
 
  721     for (
unsigned tl = 0; tl < ts_number_layers; ++tl) {
 
  722       i = sl * ts_number_layers + tl;
 
  723       char file_name[PATH_MAX];
 
  725       info.codec_fourcc = encoder->fourcc;
 
  726       info.frame_width = cfg.
g_w;
 
  727       info.frame_height = cfg.
g_h;
 
  731       snprintf(file_name, 
sizeof(file_name), 
"%s_%d.av1", argv[2], i);
 
  732       outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
 
  733       if (!outfile[i]) die(
"Failed to open %s for writing", file_name);
 
  734       assert(outfile[i] != NULL);
 
  740     die_codec(&codec, 
"Failed to initialize encoder");
 
  752   for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
 
  756   for (i = 0; i < ss_number_layers; ++i) {
 
  760   if (ss_number_layers == 2) {
 
  763   } 
else if (ss_number_layers == 3) {
 
  776     const int max_intra_size_pct = 300;
 
  782   while (frame_avail || got_data) {
 
  783     struct aom_usec_timer timer;
 
  784     frame_avail = read_frame(&input_ctx, &raw);
 
  785     int is_key_frame = (frame_cnt % cfg.
kf_max_dist) == 0;
 
  787     for (
unsigned int slx = 0; slx < ss_number_layers; slx++) {
 
  794       flags = set_layer_pattern(layering_mode, frame_cnt, &layer_id,
 
  795                                 &ref_frame_config, &use_svc_control, slx,
 
  796                                 is_key_frame, (layering_mode == 9));
 
  803       if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
 
  805       if (test_dynamic_scaling_single_layer) {
 
  806         if (frame_cnt >= 200 && frame_cnt <= 400) {
 
  818       aom_usec_timer_start(&timer);
 
  820         die_codec(&codec, 
"Failed to encode frame");
 
  821       aom_usec_timer_mark(&timer);
 
  822       cx_time += aom_usec_timer_elapsed(&timer);
 
  830                  sl < ss_number_layers; ++sl) {
 
  832                    tl < ts_number_layers; ++tl) {
 
  833                 unsigned int j = sl * ts_number_layers + tl;
 
  834                 aom_video_writer_write_frame(outfile[j], pkt->
data.
frame.buf,
 
  837                   rc.layer_encoding_bitrate[j] += 8.0 * pkt->
data.
frame.sz;
 
  842                   rc.layer_avg_frame_size[j] += 8.0 * pkt->
data.
frame.sz;
 
  843                   rc.layer_avg_rate_mismatch[j] +=
 
  844                       fabs(8.0 * pkt->
data.
frame.sz - rc.layer_pfb[j]) /
 
  846                   if (slx == 0) ++rc.layer_enc_frames[tl];
 
  855             if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
 
  856               sum_bitrate += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
 
  857               rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
 
  858               if (frame_cnt % rc.window_size == 0) {
 
  859                 rc.window_count += 1;
 
  860                 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
 
  861                 rc.variance_st_encoding_bitrate +=
 
  862                     (sum_bitrate / rc.window_size) *
 
  863                     (sum_bitrate / rc.window_size);
 
  868             if (frame_cnt > rc.window_size + rc.window_size / 2 &&
 
  869                 slx == ss_number_layers - 1) {
 
  870               sum_bitrate2 += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
 
  871               if (frame_cnt > 2 * rc.window_size &&
 
  872                   frame_cnt % rc.window_size == 0) {
 
  873                 rc.window_count += 1;
 
  874                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
 
  875                 rc.variance_st_encoding_bitrate +=
 
  876                     (sum_bitrate2 / rc.window_size) *
 
  877                     (sum_bitrate2 / rc.window_size);
 
  887     pts += frame_duration;
 
  889   close_input_file(&input_ctx);
 
  890   printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
 
  893   printf(
"Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
 
  894          frame_cnt, 1000 * (
float)cx_time / (
double)(frame_cnt * 1000000),
 
  895          1000000 * (
double)frame_cnt / (
double)cx_time);
 
  900   for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
 
  901     aom_video_writer_close(outfile[i]);
 
  903   if (input_ctx.file_type != FILE_TYPE_Y4M) {