format_ebml.c 32 KB
Newer Older
1 2
/* Icecast
 *
3
 * This program is distributed under the GNU General Public License,
4 5 6
 * version 2. A copy of this license is included with this source.
 * At your option, this specific source file can also be distributed
 * under the GNU GPL version 3.
7
 *
8
 * Copyright 2012,      David Richards, Mozilla Foundation,
9
 *                      and others (see AUTHORS for details).
10
 * Copyright 2014,      Philipp "ph3-der-loewe" Schafft <lion@lion.leolix.org>.
11 12 13 14
 */

/* format_ebml.c
 *
giles's avatar
giles committed
15
 * format plugin for WebM/EBML
16 17 18 19 20 21 22
 *
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

23
#include <stdbool.h>
24
#include <stdint.h>
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "refbuf.h"
#include "source.h"
#include "client.h"

#include "stats.h"
#include "format.h"
#include "format_ebml.h"

#define CATMODULE "format-ebml"

#include "logging.h"

41 42 43
/* The size of the header buffer; should be large enough to contain
 * everything before the first Cluster in a reasonable stream
 */
44
#define EBML_HEADER_MAX_SIZE 131072
45 46 47 48 49 50 51

/* The size of the input/staging buffers; this much of a cluster
 * will be buffered before being returned. Should be large enough
 * that the first video block will be encountered before it is full,
 * to allow probing for the keyframe flag while we still have the
 * option to mark the cluster as a sync point.
 */
52 53
#define EBML_SLICE_SIZE 4096

54
/* A value that no EBML var-int is allowed to take. */
55
#define EBML_UNKNOWN ((uint_least64_t) -1)
56

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
/* The magic numbers for each element we are interested in.
 * Defined here:
 * http://www.matroska.org/technical/specs/index.html
 * http://www.webmproject.org/docs/container/
 *
 * Some of the higher-level elements have 4-byte identifiers;
 * The lower-level elements have 1-byte identifiers.
 */
#define UNCOMMON_MAGIC_LEN 4

#define SEGMENT_MAGIC "\x18\x53\x80\x67"
#define CLUSTER_MAGIC "\x1F\x43\xB6\x75"
#define TRACKS_MAGIC "\x16\x54\xAE\x6B"

#define COMMON_MAGIC_LEN 1

#define TRACK_ENTRY_MAGIC "\xAE"
#define TRACK_NUMBER_MAGIC "\xD7"
#define TRACK_TYPE_MAGIC "\x83"
#define SIMPLE_BLOCK_MAGIC "\xA3"

78 79 80 81
/* If support for Tags gets added, it may make sense
 * to convert this into a pair of flags signaling
 * "new headers" and "new tags"
 */
82
typedef enum ebml_read_mode {
83
    /* The header buffer has not been extracted yet */
84
    EBML_STATE_READING_HEADER = 0,
85
    /* The header buffer has been read, begin normal operation */
86 87
    EBML_STATE_READING_CLUSTERS
} ebml_read_mode;
88

89
typedef enum ebml_parsing_state {
90
    /* Examine EBML elements, output to header buffer */
91
    EBML_STATE_PARSING_HEADER = 0,
92 93

    /* Blindly copy a specified number of bytes to the header buffer */
94
    EBML_STATE_COPYING_TO_HEADER,
95 96

    /* Finalize header buffer and wait for previous cluster to flush (as necessary) */
97
    EBML_STATE_START_CLUSTER,
98 99

    /* Examine EBML elements, output to data buffer */
100
    EBML_STATE_PARSING_CLUSTERS,
101 102

    /* Blindly copy a specified number of bytes to the data buffer */
103 104 105
    EBML_STATE_COPYING_TO_DATA
} ebml_parsing_state;

106
typedef enum ebml_chunk_type {
107
    /* This chunk is the header buffer */
108
    EBML_CHUNK_HEADER = 0,
109 110

    /* This chunk starts a cluster that works as a sync point */
111
    EBML_CHUNK_CLUSTER_START,
112 113 114 115

    /* This chunk continues the previous cluster, or
     * else starts a non-sync-point cluster
     */
116 117 118
    EBML_CHUNK_CLUSTER_CONTINUE
} ebml_chunk_type;

119
typedef enum ebml_keyframe_status {
120
    /* Have not found a video track block yet */
121
    EBML_KEYFRAME_UNKNOWN = -1,
122 123

    /* Found the first video track block, it was not a keyframe */
124
    EBML_KEYFRAME_DOES_NOT_START_CLUSTER = 0,
125 126

    /* Found the first video track block, it was a keyframe */
127 128 129
    EBML_KEYFRAME_STARTS_CLUSTER = 1
} ebml_keyframe_status;

130
typedef struct ebml_st {
131

132
    ebml_read_mode output_state;
133
    ebml_parsing_state parse_state;
134
    uint_least64_t copy_len;
135

136
    ssize_t cluster_start;
137
    ebml_keyframe_status cluster_starts_with_keyframe;
138
    bool flush_cluster;
139

140
    size_t position;
141
    unsigned char *buffer;
142

143
    size_t input_position;
144
    unsigned char *input_buffer;
145

146 147 148
    size_t header_size;
    size_t header_position;
    size_t header_read_position;
149
    unsigned char *header;
150

151 152
    uint_least64_t keyframe_track_number;
    uint_least64_t parsing_track_number;
153
    bool parsing_track_is_video;
154 155 156 157 158 159
} ebml_t;

typedef struct ebml_source_state_st {

    ebml_t *ebml;
    refbuf_t *header;
160
    bool file_headers_written;
161 162 163 164 165 166 167 168 169

} ebml_source_state_t;

typedef struct ebml_client_data_st {

    refbuf_t *header;
    size_t header_pos;

} ebml_client_data_t;
170

171 172 173 174 175 176
static void ebml_free_plugin(format_plugin_t *plugin);
static refbuf_t *ebml_get_buffer(source_t *source);
static int ebml_write_buf_to_client(client_t *client);
static void ebml_write_buf_to_file(source_t *source, refbuf_t *refbuf);
static int ebml_create_client_data(source_t *source, client_t *client);
static void ebml_free_client_data(client_t *client);
177 178 179

static ebml_t *ebml_create();
static void ebml_destroy(ebml_t *ebml);
180 181 182 183 184
static size_t ebml_read_space(ebml_t *ebml);
static size_t ebml_read(ebml_t *ebml, char *buffer, size_t len, ebml_chunk_type *chunk_type);
static unsigned char *ebml_get_write_buffer(ebml_t *ebml, size_t *bytes);
static ssize_t ebml_wrote(ebml_t *ebml, size_t len);
static ssize_t ebml_parse_tag(unsigned char      *buffer,
185
                              unsigned char      *buffer_end,
186
                              uint_least64_t *tag_id,
187
                              uint_least64_t *payload_length);
188 189
static ssize_t ebml_parse_var_int(unsigned char      *buffer,
                                  unsigned char      *buffer_end,
190
                                  uint_least64_t *out_value);
191 192 193
static ssize_t ebml_parse_sized_int(unsigned char      *buffer,
                                    unsigned char      *buffer_end,
                                    size_t             len,
194
                                    bool                is_signed,
195
                                    uint_least64_t *out_value);
196
static inline void ebml_check_track(ebml_t *ebml);
197

198
int format_ebml_get_plugin(source_t *source)
199 200 201 202 203 204 205 206 207 208 209 210 211
{

    ebml_source_state_t *ebml_source_state = calloc(1, sizeof(ebml_source_state_t));
    format_plugin_t *plugin = calloc(1, sizeof(format_plugin_t));

    plugin->get_buffer = ebml_get_buffer;
    plugin->write_buf_to_client = ebml_write_buf_to_client;
    plugin->create_client_data = ebml_create_client_data;
    plugin->free_plugin = ebml_free_plugin;
    plugin->write_buf_to_file = ebml_write_buf_to_file;
    plugin->set_tag = NULL;
    plugin->apply_settings = NULL;

212
    plugin->contenttype = httpp_getvar(source->parser, "content-type");
213 214

    plugin->_state = ebml_source_state;
215
    vorbis_comment_init(&plugin->vc);
216 217 218
    source->format = plugin;

    ebml_source_state->ebml = ebml_create();
219

220 221 222
    return 0;
}

223
static void ebml_free_plugin(format_plugin_t *plugin)
224 225 226 227
{

    ebml_source_state_t *ebml_source_state = plugin->_state;

228
    refbuf_release(ebml_source_state->header);
229
    ebml_destroy(ebml_source_state->ebml);
230
    free(ebml_source_state);
231
    vorbis_comment_clear(&plugin->vc);
232
    free(plugin);
233 234
}

235 236
/* Write to a client from the header buffer.
 */
237
static int send_ebml_header(client_t *client)
238 239 240
{

    ebml_client_data_t *ebml_client_data = client->format_data;
241
    size_t len = EBML_SLICE_SIZE;
242 243
    int ret;

244
    if (ebml_client_data->header->len - ebml_client_data->header_pos < len)
245 246 247
    {
        len = ebml_client_data->header->len - ebml_client_data->header_pos;
    }
248
    ret = client_send_bytes (client,
249 250 251 252 253 254 255 256 257 258 259 260
                             ebml_client_data->header->data + ebml_client_data->header_pos,
                             len);

    if (ret > 0)
    {
        ebml_client_data->header_pos += ret;
    }

    return ret;

}

261 262
/* Initial write-to-client function.
 */
263 264 265 266 267 268 269 270 271 272 273
static int ebml_write_buf_to_client (client_t *client)
{

    ebml_client_data_t *ebml_client_data = client->format_data;

    if (ebml_client_data->header_pos != ebml_client_data->header->len)
    {
        return send_ebml_header (client);
    }
    else
    {
274 275
        /* Now that the header's sent, short-circuit to the generic
         * write-refbufs function. */
276 277 278 279 280 281
        client->write_to_client = format_generic_write_to_client;
        return client->write_to_client(client);
    }

}

282 283
/* Return a refbuf to add to the queue.
 */
284
static refbuf_t *ebml_get_buffer(source_t *source)
285 286 287 288
{

    ebml_source_state_t *ebml_source_state = source->format->_state;
    format_plugin_t *format = source->format;
289
    unsigned char *write_buffer = NULL;
290
    ssize_t read_bytes = 0;
291
    size_t write_bytes = 0;
292
    ebml_chunk_type chunk_type;
293
    refbuf_t *refbuf;
294
    ssize_t ret;
295 296 297

    while (1)
    {
298 299
        read_bytes = ebml_read_space(ebml_source_state->ebml);
        if (read_bytes > 0) {
300
            /* A chunk is available for reading */
301 302
            refbuf = refbuf_new(read_bytes);
            ebml_read(ebml_source_state->ebml, refbuf->data, read_bytes, &chunk_type);
303 304 305

            if (ebml_source_state->header == NULL)
            {
306
                /* Capture header before adding clusters to the queue */
307 308 309 310
                ebml_source_state->header = refbuf;
                continue;
            }

311
            if (chunk_type == EBML_CHUNK_CLUSTER_START)
312 313 314 315 316
            {
                refbuf->sync_point = 1;
            }
            return refbuf;

317
        } else if(read_bytes == 0) {
318
            /* Feed more bytes into the parser */
319
            write_buffer = ebml_get_write_buffer(ebml_source_state->ebml, &write_bytes);
320
            read_bytes = client_body_read(source->client, write_buffer, write_bytes);
321
            if (read_bytes <= 0) {
322 323 324
                ebml_wrote (ebml_source_state->ebml, 0);
                return NULL;
            }
325 326 327
            format->read_bytes += read_bytes;
            ret = ebml_wrote (ebml_source_state->ebml, read_bytes);
            if (ret != read_bytes) {
328
                ICECAST_LOG_ERROR("Problem processing stream");
329 330 331
                source->running = 0;
                return NULL;
            }
332 333 334 335
        } else {
            ICECAST_LOG_ERROR("Problem processing stream");
            source->running = 0;
            return NULL;
336 337 338 339
        }
    }
}

340 341
/* Initialize client state.
 */
342
static int ebml_create_client_data(source_t *source, client_t *client)
343
{
344
    ebml_client_data_t *ebml_client_data;
345 346
    ebml_source_state_t *ebml_source_state = source->format->_state;

347 348
    if (!ebml_source_state->header)
        return -1;
349

350 351 352
    ebml_client_data = calloc(1, sizeof(ebml_client_data_t));
    if (!ebml_client_data)
        return -1;
353

354 355 356 357 358
    ebml_client_data->header = ebml_source_state->header;
    refbuf_addref(ebml_client_data->header);
    client->format_data = ebml_client_data;
    client->free_client_data = ebml_free_client_data;
    return 0;
359 360 361 362 363 364 365 366 367 368 369 370 371 372
}

static void ebml_free_client_data (client_t *client)
{

    ebml_client_data_t *ebml_client_data = client->format_data;

    refbuf_release (ebml_client_data->header);
    free (client->format_data);
    client->format_data = NULL;
}

static void ebml_write_buf_to_file_fail (source_t *source)
{
373
    ICECAST_LOG_WARN("Write to dump file failed, disabling");
374 375 376 377 378 379 380 381 382
    fclose (source->dumpfile);
    source->dumpfile = NULL;
}

static void ebml_write_buf_to_file (source_t *source, refbuf_t *refbuf)
{

    ebml_source_state_t *ebml_source_state = source->format->_state;

383
    if ( ! ebml_source_state->file_headers_written)
384 385
    {
        if (fwrite (ebml_source_state->header->data, 1,
386
                    ebml_source_state->header->len,
387 388 389
                    source->dumpfile) != ebml_source_state->header->len)
            ebml_write_buf_to_file_fail(source);
        else
390
            ebml_source_state->file_headers_written = true;
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
    }

    if (fwrite (refbuf->data, 1, refbuf->len, source->dumpfile) != refbuf->len)
    {
        ebml_write_buf_to_file_fail(source);
    }

}

/* internal ebml parsing */

static void ebml_destroy(ebml_t *ebml)
{

    free(ebml->header);
    free(ebml->input_buffer);
    free(ebml->buffer);
    free(ebml);

}

static ebml_t *ebml_create()
{

    ebml_t *ebml = calloc(1, sizeof(ebml_t));

417 418
    ebml->output_state = EBML_STATE_READING_HEADER;

419
    ebml->header = calloc(1, EBML_HEADER_MAX_SIZE);
420
    ebml->buffer = calloc(1, EBML_SLICE_SIZE);
421 422
    ebml->input_buffer = calloc(1, EBML_SLICE_SIZE);

423
    ebml->cluster_start = -1;
424

425 426
    ebml->keyframe_track_number = EBML_UNKNOWN;
    ebml->parsing_track_number = EBML_UNKNOWN;
427
    ebml->parsing_track_is_video = false;
428 429 430 431 432

    return ebml;

}

433 434 435
/* Return the size of a buffer needed to store the next
 * chunk that ebml_read can yield.
 */
436
static size_t ebml_read_space(ebml_t *ebml)
437 438
{

439
    size_t read_space;
440

441 442
    switch (ebml->output_state) {
        case EBML_STATE_READING_HEADER:
443

444 445 446 447 448 449 450 451
            if (ebml->header_size != 0) {
                /* The header can be read */
                return ebml->header_size;
            } else {
                /* The header's not ready yet */
                return 0;
            }
            break;
452

453
        case EBML_STATE_READING_CLUSTERS:
454

455 456 457 458
            if (ebml->cluster_start > 0) {
                /* return up until just before a new cluster starts */
                read_space = ebml->cluster_start;
            } else {
459

460 461 462 463
                if (ebml->position == EBML_SLICE_SIZE) {
                    /* The current cluster fills the buffer,
                     * we have no choice but to start flushing it.
                     */
464

465
                    ebml->flush_cluster = true;
466
                }
467

468 469 470 471 472 473 474 475 476
                if (ebml->flush_cluster) {
                    /* return what we have */
                    read_space = ebml->position;
                } else {
                    /* wait until we've read more, so the parser has
                     * time to gather metadata
                     */
                    read_space = 0;
                }
477
            }
478

479
            return read_space;
480
    }
481

482 483
    ICECAST_LOG_ERROR("EBML: Invalid parser read state");
    return 0;
484 485
}

486
/* Return a chunk of the EBML/MKV/WebM stream.
487
 * The header will be buffered until it can be returned as one chunk.
488
 * A cluster element's opening tag will always start a new chunk.
489 490 491
 * 
 * chunk_type will be set to indicate if the chunk is the header,
 * the start of a cluster, or continuing the current cluster.
492
 */
493
static size_t ebml_read(ebml_t *ebml, char *buffer, size_t len, ebml_chunk_type *chunk_type)
494 495
{

496 497
    size_t read_space;
    size_t to_read;
498

499
    *chunk_type = EBML_CHUNK_HEADER;
500

501
    if (len < 1) {
502
        return 0;
503
    }
504

505 506
    switch (ebml->output_state) {
        case EBML_STATE_READING_HEADER:
507

508 509 510 511
            if (ebml->header_size != 0)
            {
                /* Can read a chunk of the header */
                read_space = ebml->header_size - ebml->header_read_position;
512

513 514 515 516 517
                if (read_space >= len) {
                    to_read = len;
                } else {
                    to_read = read_space;
                }
518

519 520
                memcpy(buffer, ebml->header, to_read);
                ebml->header_read_position += to_read;
521

522
                *chunk_type = EBML_CHUNK_HEADER;
523

524 525 526 527 528 529 530
                if (ebml->header_read_position == ebml->header_size) {
                    ebml->output_state = EBML_STATE_READING_CLUSTERS;
                }
            } else {
                /* The header's not ready yet */
                return 0;
            }
531

532
            break;
533

534
        case EBML_STATE_READING_CLUSTERS:
535

536 537
            *chunk_type = EBML_CHUNK_CLUSTER_CONTINUE;
            read_space = ebml->position;
538

539 540
            if (ebml->cluster_start == 0) {
                /* new cluster is starting now */
541

542 543 544 545 546 547 548
                if (ebml->cluster_starts_with_keyframe != EBML_KEYFRAME_DOES_NOT_START_CLUSTER) {
                    /* If we positively identified the first video frame as a non-keyframe,
                     * don't use this cluster as a sync point. Since some files lack
                     * video tracks completely, or we may have failed to probe
                     * the first video frame, it's better to be pass through
                     * ambiguous cases to avoid blocking the stream forever.
                     */
549 550
                    *chunk_type = EBML_CHUNK_CLUSTER_START;
                }
551

552 553 554
                /* mark end of cluster */
                ebml->cluster_start = -1;
            } else if (ebml->cluster_start > 0) {
555 556 557
                /* return up until just before a new cluster starts */
                read_space = ebml->cluster_start;
            }
558

559 560 561
            if (read_space < 1) {
                return 0;
            }
562

563
            if (read_space >= len ) {
564
                to_read = len;
565
            } else {
566
                to_read = read_space;
567
            }
568

569
            memcpy(buffer, ebml->buffer, to_read);
570

571 572 573
            /* Shift unread data down to the start of the buffer */
            memmove(ebml->buffer, ebml->buffer + to_read, ebml->position - to_read);
            ebml->position -= to_read;
574

575 576
            if (ebml->cluster_start > 0) {
                ebml->cluster_start -= to_read;
577
            }
578

579
            break;
580 581 582 583 584 585
    }

    return to_read;

}

586 587 588 589 590
/* Get pointer & length of the buffer able to accept input.
 * 
 * Returns the start of the writable space;
 * Sets bytes to the amount of space available.
 */
591
static unsigned char *ebml_get_write_buffer(ebml_t *ebml, size_t *bytes)
592
{
593 594
    *bytes = EBML_SLICE_SIZE - ebml->input_position;
    return ebml->input_buffer + ebml->input_position;
595 596
}

597 598
/* Process data that has been written to the EBML parser's input buffer.
 */
599
static ssize_t ebml_wrote(ebml_t *ebml, size_t len)
600
{
601
    bool processing = true;
602 603
    size_t cursor = 0;
    size_t to_copy;
604
    unsigned char *end_of_buffer;
605

606 607 608
    ssize_t tag_length;
    ssize_t value_length;
    ssize_t track_number_length;
609
    uint_least64_t tag_id;
610 611 612
    uint_least64_t payload_length;
    uint_least64_t data_value;
    uint_least64_t track_number;
613
    unsigned char flags;
614
    ebml_parsing_state copy_state;
615

616 617
    ebml->input_position += len;
    end_of_buffer = ebml->input_buffer + ebml->input_position;
618

619
    while (processing) {
620

621
        switch (ebml->parse_state) {
622

623 624
            case EBML_STATE_PARSING_HEADER:
            case EBML_STATE_PARSING_CLUSTERS:
625

626 627 628 629 630
                if (ebml->parse_state == EBML_STATE_PARSING_HEADER) {
                    copy_state = EBML_STATE_COPYING_TO_HEADER;
                } else {
                    copy_state = EBML_STATE_COPYING_TO_DATA;
                }
631

632
                tag_length = ebml_parse_tag(ebml->input_buffer + cursor,
633
                                            end_of_buffer, &tag_id, &payload_length);
634

635 636 637 638 639 640 641 642
                if (tag_length == 0) {
                    /* Wait for more data */
                    processing = false;
                    break;
                } else if (tag_length < 0) {
                    /* Parse error */
                    return -1;
                }
643

644 645 646 647
                if (payload_length == EBML_UNKNOWN) {
                    /* Parse all children for tags we can't skip */
                    payload_length = 0;
                }
648

649
                    /* Recognize tags of interest */
650 651
                    if (tag_length > UNCOMMON_MAGIC_LEN) {
                        if (!memcmp(ebml->input_buffer + cursor, CLUSTER_MAGIC, UNCOMMON_MAGIC_LEN)) {
652 653 654
                            /* Found a Cluster */
                            ebml->parse_state = EBML_STATE_START_CLUSTER;
                            break;
655
                        } else if (!memcmp(ebml->input_buffer + cursor, SEGMENT_MAGIC, UNCOMMON_MAGIC_LEN)) {
656 657
                            /* Parse all Segment children */
                            payload_length = 0;
658

659
                        } else if (!memcmp(ebml->input_buffer + cursor, TRACKS_MAGIC, UNCOMMON_MAGIC_LEN)) {
660 661
                            /* Parse all Tracks children */
                            payload_length = 0;
662

663
                        }
664

665
                    }
666

667 668
                    if (tag_length > COMMON_MAGIC_LEN) {
                        if (!memcmp(ebml->input_buffer + cursor, SIMPLE_BLOCK_MAGIC, COMMON_MAGIC_LEN)) {
669 670 671 672
                            /* Probe SimpleBlock header for the keyframe status */
                            if (ebml->cluster_starts_with_keyframe == EBML_KEYFRAME_UNKNOWN) {
                                track_number_length = ebml_parse_var_int(ebml->input_buffer + cursor + tag_length,
                                                                  end_of_buffer, &track_number);
673

674 675
                                if (track_number_length == 0) {
                                    /* Wait for more data */
676
                                    processing = false;
677 678 679 680
                                } else if (track_number_length < 0) {
                                    return -1;
                                } else if (track_number == ebml->keyframe_track_number) {
                                    /* this block belongs to the video track */
681

682 683 684
                                    /* skip the 16-bit timecode for now, read the flags byte */
                                    if (cursor + tag_length + track_number_length + 2 >= ebml->input_position) {
                                        /* Wait for more data */
685
                                        processing = false;
686 687
                                    } else {
                                        flags = ebml->input_buffer[cursor + tag_length + track_number_length + 2];
688

689 690 691 692 693 694 695 696 697
                                        if (flags & 0x80) {
                                            /* "keyframe" flag is set */
                                            ebml->cluster_starts_with_keyframe = EBML_KEYFRAME_STARTS_CLUSTER;
                                            /* ICECAST_LOG_DEBUG("Found keyframe in track %hhu", track_number); */
                                        } else {
                                            ebml->cluster_starts_with_keyframe = EBML_KEYFRAME_DOES_NOT_START_CLUSTER;
                                            /* ICECAST_LOG_DEBUG("Found non-keyframe in track %hhu", track_number); */
                                        }
                                    }
698

699
                                }
700

701
                            }
702

703
                        } else if (!memcmp(ebml->input_buffer + cursor, TRACK_ENTRY_MAGIC, COMMON_MAGIC_LEN)) {
704 705 706
                            /* Parse all TrackEntry children; reset the state */
                            payload_length = 0;
                            ebml->parsing_track_number = EBML_UNKNOWN;
707
                            ebml->parsing_track_is_video = false;
708

709
                        } else if (!memcmp(ebml->input_buffer + cursor, TRACK_NUMBER_MAGIC, COMMON_MAGIC_LEN)) {
710 711 712
                            /* Probe TrackNumber for value */
                            value_length = ebml_parse_sized_int(ebml->input_buffer + cursor + tag_length,
                                                                end_of_buffer, payload_length, 0, &data_value);
713

714 715
                            if (value_length == 0) {
                                /* Wait for more data */
716
                                processing = false;
717 718 719 720 721 722
                            } else if (value_length < 0) {
                                return -1;
                            } else {
                                ebml->parsing_track_number = data_value;
                                ebml_check_track(ebml);
                            }
723

724
                        } else if (!memcmp(ebml->input_buffer + cursor, TRACK_TYPE_MAGIC, COMMON_MAGIC_LEN)) {
725 726 727
                            /* Probe TrackType for a video flag */
                            value_length = ebml_parse_sized_int(ebml->input_buffer + cursor + tag_length,
                                                                end_of_buffer, payload_length, 0, &data_value);
728

729 730
                            if (value_length == 0) {
                                /* Wait for more data */
731
                                processing = false;
732 733 734 735 736
                            } else if (value_length < 0) {
                                return -1;
                            } else {
                                if (data_value & 0x01) {
                                    /* This is a video track (0x01 flag = video) */
737
                                    ebml->parsing_track_is_video = true;
738 739 740
                                    ebml_check_track(ebml);
                                }
                            }
741

742 743
                        }
                    }
744

745
                    if (processing) {
746
                        /* Moving to next element, copy current to buffer */
747 748 749 750 751
                        ebml->copy_len = tag_length + payload_length;
                        ebml->parse_state = copy_state;
                    }

                break;
752

753 754 755 756 757 758 759
            case EBML_STATE_START_CLUSTER:
                /* found a cluster; wait to process it until
                 * any previous cluster tag has been flushed
                 * from the read buffer, so as to not lose the
                 * sync point.
                 */
                if (ebml->cluster_start >= 0) {
760
                    /* Allow the cluster in the read buffer to flush. */
761 762
                    ebml->flush_cluster = true;
                    processing = false;
763
                } else {
764

765
                    tag_length = ebml_parse_tag(ebml->input_buffer + cursor,
766
                                                end_of_buffer, &tag_id, &payload_length);
767

768 769
                    /* The header has been fully read by now, publish its size. */
                    ebml->header_size = ebml->header_position;
770

771
                    /* Mark this potential sync point, prepare probe */
772
                    ebml->cluster_start = ebml->position;
773
                    ebml->cluster_starts_with_keyframe = EBML_KEYFRAME_UNKNOWN;
774

775
                    /* Buffer data to give us time to probe for keyframes, etc. */
776
                    ebml->flush_cluster = false;
777

778 779 780 781 782
                    /* Copy cluster tag to read buffer */
                    ebml->copy_len = tag_length;
                    ebml->parse_state = EBML_STATE_COPYING_TO_DATA;
                }
                break;
783

784 785 786 787 788 789
            case EBML_STATE_COPYING_TO_HEADER:
            case EBML_STATE_COPYING_TO_DATA:
                to_copy = ebml->input_position - cursor;
                if (to_copy > ebml->copy_len) {
                    to_copy = ebml->copy_len;
                }
790

791 792 793 794 795
                if (ebml->parse_state == EBML_STATE_COPYING_TO_HEADER) {
                    if ((ebml->header_position + to_copy) > EBML_HEADER_MAX_SIZE) {
                        ICECAST_LOG_ERROR("EBML Header too large, failing");
                        return -1;
                    }
796

797 798
                    memcpy(ebml->header + ebml->header_position, ebml->input_buffer + cursor, to_copy);
                    ebml->header_position += to_copy;
799

800 801 802 803
                } else if (ebml->parse_state == EBML_STATE_COPYING_TO_DATA) {
                    if ((ebml->position + to_copy) > EBML_SLICE_SIZE) {
                        to_copy = EBML_SLICE_SIZE - ebml->position;
                    }
804

805 806 807
                    memcpy(ebml->buffer + ebml->position, ebml->input_buffer + cursor, to_copy);
                    ebml->position += to_copy;
                }
808

809 810
                cursor += to_copy;
                ebml->copy_len -= to_copy;
811

812 813 814 815 816 817 818 819 820
                if (ebml->copy_len == 0) {
                    /* resume parsing */
                    if (ebml->parse_state == EBML_STATE_COPYING_TO_HEADER) {
                        ebml->parse_state = EBML_STATE_PARSING_HEADER;
                    } else {
                        ebml->parse_state = EBML_STATE_PARSING_CLUSTERS;
                    }
                } else {
                    /* wait for more data */
821
                    processing = false;
822
                }
823

824
                break;
825

826
            default:
827
                processing = false;
828

829
        }
830

831
    }
832

833 834 835
    /* Shift unprocessed data down to the start of the buffer */
    memmove(ebml->input_buffer, ebml->input_buffer + cursor, ebml->input_position - cursor);
    ebml->input_position -= cursor;
836

837
    return len;
838

839
}
840

841 842 843 844 845
static inline void ebml_check_track(ebml_t *ebml)
{
    if (ebml->keyframe_track_number == EBML_UNKNOWN
        && ebml->parsing_track_is_video
        && ebml->parsing_track_number != EBML_UNKNOWN) {
846

847
        ebml->keyframe_track_number = ebml->parsing_track_number;
848
        ICECAST_LOG_DEBUG("Identified track #%llu as the video track", (long long unsigned int)ebml->keyframe_track_number);
849 850 851
    }
}

852 853 854 855 856 857 858 859 860 861 862 863
/* Try to parse an EBML tag at the given location, returning the
 * length of the tag & the length of the associated payload.
 * 
 * Returns the length of the tag on success, and writes the payload
 * size to *payload_length.
 * 
 * Return 0 if it would be necessary to read past the
 * given end-of-buffer address to read a complete tag.
 * 
 * Returns -1 if the tag is corrupt.
 */

864
static ssize_t ebml_parse_tag(unsigned char *buffer,
865
                              unsigned char *buffer_end,
866
                              uint_least64_t *tag_id,
867
                              uint_least64_t *payload_length)
868
{
869 870
    ssize_t type_length;
    ssize_t size_length;
871

872
    *tag_id = 0;
873
    *payload_length = 0;
874

875
    /* read past the type tag */
876
    type_length = ebml_parse_var_int(buffer, buffer_end, tag_id);
877

878 879 880
    if (type_length <= 0) {
        return type_length;
    }
881

882 883
    /* read the length tag */
    size_length = ebml_parse_var_int(buffer + type_length, buffer_end, payload_length);
884

885 886 887
    if (size_length <= 0) {
        return size_length;
    }
888

889 890 891 892 893 894 895 896 897
    return type_length + size_length;
}

/* Try to parse an EBML variable-length integer.
 * Returns 0 if there's not enough space to read the number;
 * Returns -1 if the number is malformed.
 * Else, returns the length of the number in bytes and writes the
 * value to *out_value.
 */
898 899
static ssize_t ebml_parse_var_int(unsigned char *buffer,
                                 unsigned char *buffer_end,
900
                                 uint_least64_t *out_value)
901
{
902 903
    ssize_t size = 1;
    ssize_t i;
904
    unsigned char mask = 0x80;
905 906
    uint_least64_t value;
    uint_least64_t unknown_marker;
907

908 909 910
    if (buffer >= buffer_end) {
        return 0;
    }
911

912 913
    /* find the length marker bit in the first byte */
    value = buffer[0];
914

915 916 917 918 919 920 921 922 923
    while (mask) {
        if (value & mask) {
            value = value & ~mask;
            unknown_marker = mask - 1;
            break;
        }
        size++;
        mask = mask >> 1;
    }
924

925 926 927 928 929
    /* catch malformed number (no prefix) */
    if (mask == 0) {
        ICECAST_LOG_DEBUG("Corrupt var-int");
        return -1;
    }
930

931 932 933 934
    /* catch number bigger than parsing buffer */
    if (buffer + size - 1 >= buffer_end) {
        return 0;
    }
935

936 937 938 939 940
    /* read remaining bytes of (big-endian) number */
    for (i = 1; i < size; i++) {
        value = (value << 8) + buffer[i];
        unknown_marker = (unknown_marker << 8) + 0xFF;
    }
941

942
    /* catch special "unknown" length */
943

944 945 946 947 948 949 950 951 952
    if (value == unknown_marker) {
        *out_value = EBML_UNKNOWN;
    } else {
        *out_value = value;
    }

/*
    ICECAST_LOG_DEBUG("Varint: value %lli, unknown %llu, mask %hhu, size %i", value, unknown_marker, mask, size);
*/
953

954 955
    return size;
}
956

957
/* Parse a big-endian int that may be from 1-8 bytes long.
958 959 960 961
 * Returns 0 if there's not enough space to read the number;
 * Returns -1 if the number is mis-sized.
 * Else, returns the length of the number in bytes and writes the
 * value to *out_value.
962 963
 * If is_signed is true, then the int is assumed to be two's complement
 * signed, negative values will be correctly promoted, and the returned
964
 * unsigned number can be safely cast to a signed number on systems using
965
 * two's complement arithmatic.
966
 */
967
static ssize_t ebml_parse_sized_int(unsigned char       *buffer,
968 969
                                    unsigned char       *buffer_end,
                                    size_t              len,
970
                                    bool                 is_signed,
971
                                    uint_least64_t  *out_value)
972
{
973
    uint_least64_t value;
974
    size_t i;
975

976 977 978 979
    if (len < 1 || len > 8) {
        ICECAST_LOG_DEBUG("Sized int of %i bytes", len);
        return -1;
    }
980

981 982 983
    if (buffer + len >= buffer_end) {
        return 0;
    }
984

985 986 987 988 989
    if (is_signed && ((signed char) buffer[0]) < 0) {
        value = -1;
    } else {
        value = 0;
    }
990

991 992 993
    for (i = 0; i < len; i++) {
        value = (value << 8) + ((unsigned char) buffer[i]);
    }
994

995
    *out_value = value;
996

997 998
    return len;
}