Commit 74805cc2 authored by shans's avatar shans

Added content type detection and retrieval to liboggz. If oggz files are

opened with the OGGZ_AUTO flag, the content type of a stream can be retrieved
using oggz_stream_get_content once the first page of that stream has been read.
A human-readable string representation can also be retrieved using 
oggz_stream_get_content_type.

Updated oggzinfo, etc. to use liboggz variant of content type detection.



git-svn-id: http://svn.annodex.net/liboggz/trunk@2339 8158c8cd-e7e1-0310-9fa4-c5954c97daef
parent eb7955b6
......@@ -96,6 +96,23 @@ enum OggzFlushOpts {
OGGZ_FLUSH_AFTER = 0x02
};
/**
* Definition of stream content types
*/
typedef enum OggzStreamContent {
OGGZ_CONTENT_THEORA = 0,
OGGZ_CONTENT_VORBIS,
OGGZ_CONTENT_SPEEX,
OGGZ_CONTENT_PCM,
OGGZ_CONTENT_CMML,
OGGZ_CONTENT_ANX2,
OGGZ_CONTENT_SKELETON,
OGGZ_CONTENT_FLAC0,
OGGZ_CONTENT_FLAC,
OGGZ_CONTENT_ANXDATA,
OGGZ_CONTENT_UNKNOWN
} OggzStreamContent;
/**
* Definitions of error return values
*/
......
......@@ -181,4 +181,28 @@ long oggz_read_input (OGGZ * oggz, unsigned char * buf, long n);
*/
int oggz_purge (OGGZ * oggz);
/**
* Determine the content type of the oggz stream referred to by \a serialno
*
* \param oggz An OGGZ handle
* \param serialno An ogg stream serialno
* \retval OGGZ_CONTENT_THEORA..OGGZ_CONTENT_UNKNOWN content successfully
* identified
* \retval OGGZ_ERR_BAD_OGGZ \a oggz does not refer to an existing OGGZ
* \retval OGGZ_ERR_BAD_SERIALNO \a serialno does not refer to an existing
* stream
*/
OggzStreamContent oggz_stream_get_content (OGGZ * oggz, long serialno);
/**
* Return human-readable string representation of content type of oggz stream
* referred to by \a serialno
*
* \param oggz An OGGZ handle
* \param serialno An ogg stream serialno
* \retval string the name of the content type
* \retval NULL \a oggz or \a serialno invalid
*/
const char * oggz_stream_get_content_type (OGGZ *oggz, long serialno);
#endif /* __OGGZ_READ_H__ */
......@@ -19,7 +19,7 @@ liboggz_la_SOURCES = \
oggz_stream.c oggz_stream.h \
oggz_table.c \
oggz_vector.c oggz_vector.h \
metric_internal.c
metric_internal.c
liboggz_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ @SHLIB_VERSION_ARG@
liboggz_la_LIBADD = @OGG_LIBS@
......@@ -56,6 +56,9 @@
oggz_io_set_flush;
oggz_io_get_flush_user_handle;
oggz_stream_get_content;
oggz_stream_get_content_type;
oggz_table_new;
oggz_table_delete;
oggz_table_insert;
......
......@@ -73,9 +73,6 @@ auto_speex (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 68) return 0;
if (strncmp ((char *)header, "Speex ", 8)) return 0;
if (!op->b_o_s) return 0;
granule_rate = (ogg_int64_t) INT32_LE_AT(&header[36]);
#ifdef DEBUG
printf ("Got speex rate %d\n", (int)granule_rate);
......@@ -94,10 +91,6 @@ auto_vorbis (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 30) return 0;
if (header[0] != 0x01) return 0;
if (strncmp ((char *)&header[1], "vorbis", 6)) return 0;
if (!op->b_o_s) return 0;
granule_rate = (ogg_int64_t) INT32_LE_AT(&header[12]);
#ifdef DEBUG
printf ("Got vorbis rate %d\n", (int)granule_rate);
......@@ -129,10 +122,6 @@ auto_theora (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 41) return 0;
if (header[0] != 0x80) return 0;
if (strncmp ((char *)&header[1], "theora", 6)) return 0;
if (!op->b_o_s) return 0;
fps_numerator = INT32_BE_AT(&header[22]);
fps_denominator = INT32_BE_AT(&header[26]);
......@@ -167,13 +156,6 @@ auto_theora (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
static int
auto_annodex (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
{
unsigned char * header = op->packet;
if (op->bytes < 8) return 0;
if (strncmp ((char *)header, "Annodex", 8)) return 0;
if (!op->b_o_s) return 0;
/* Apply a zero metric */
oggz_set_granulerate (oggz, serialno, 0, 1);
......@@ -188,9 +170,6 @@ auto_anxdata (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 28) return 0;
if (strncmp ((char *)header, "AnxData", 8)) return 0;
if (!op->b_o_s) return 0;
granule_rate_numerator = INT64_LE_AT(&header[8]);
granule_rate_denominator = INT64_LE_AT(&header[16]);
#ifdef DEBUG
......@@ -209,24 +188,15 @@ static int
auto_flac0 (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
{
unsigned char * header = op->packet;
int content;
ogg_int64_t granule_rate = 0;
if (op->b_o_s) {
if (op->bytes < 4) return 0;
if (strncmp ((char *)header, "fLaC", 4)) return 0;
oggz_stream_set_content (oggz, serialno, OGGZ_CONTENT_FLAC0);
} else {
content = oggz_stream_get_content (oggz, serialno);
if (content != OGGZ_CONTENT_FLAC0) return 0;
granule_rate = (ogg_int64_t) (header[14] << 12) | (header[15] << 4) | ((header[16] >> 4)&0xf);
granule_rate = (ogg_int64_t) (header[14] << 12) | (header[15] << 4) |
((header[16] >> 4)&0xf);
#ifdef DEBUG
printf ("Got flac rate %d\n", (int)granule_rate);
#endif
oggz_set_granulerate (oggz, serialno, granule_rate, OGGZ_AUTO_MULT);
}
oggz_set_granulerate (oggz, serialno, granule_rate, OGGZ_AUTO_MULT);
return 1;
}
......@@ -239,11 +209,8 @@ auto_flac (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 51) return 0;
if (header[0] != 0x7f) return 0;
if (strncmp ((char *)&header[1], "FLAC", 4)) return 0;
if (!op->b_o_s) return 0;
granule_rate = (ogg_int64_t) (header[27] << 12) | (header[28] << 4) | ((header[29] >> 4)&0xf);
granule_rate = (ogg_int64_t) (header[27] << 12) | (header[28] << 4) |
((header[29] >> 4)&0xf);
#ifdef DEBUG
printf ("Got flac rate %d\n", (int)granule_rate);
#endif
......@@ -265,9 +232,6 @@ auto_oggpcm2 (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 28) return 0;
if (strncmp ((char *)header, "PCM ", 8)) return 0;
if (!op->b_o_s) return 0;
granule_rate = (ogg_int64_t) INT32_BE_AT(&header[16]);
#ifdef DEBUG
printf ("Got OggPCM2 rate %d\n", (int)granule_rate);
......@@ -287,9 +251,6 @@ auto_cmml (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 28) return 0;
if (strncmp ((char *)header, "CMML", 4)) return 0;
if (!op->b_o_s) return 0;
granule_rate_numerator = INT64_LE_AT(&header[12]);
granule_rate_denominator = INT64_LE_AT(&header[20]);
if (op->bytes > 28)
......@@ -310,31 +271,9 @@ auto_cmml (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
return 1;
}
static int
auto_fishead (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
{
unsigned char * header = op->packet;
int content;
if (op->b_o_s) {
if (op->bytes < 8) return 0;
if (strncmp ((char *)header, "fishead", 8)) return 0;
oggz_stream_set_content (oggz, serialno, OGGZ_CONTENT_SKELETON);
} else if (op->e_o_s) {
content = oggz_stream_get_content (oggz, serialno);
if (content != OGGZ_CONTENT_SKELETON) return 0;
/* Finished processing the skeleton; apply a zero metric */
oggz_set_granulerate (oggz, serialno, 0, 1);
}
return 1;
}
static int
auto_fisbone (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
{
int content;
unsigned char * header = op->packet;
long fisbone_serialno; /* The serialno referred to in this fisbone */
ogg_int64_t granule_rate_numerator = 0, granule_rate_denominator = 0;
......@@ -342,10 +281,6 @@ auto_fisbone (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
if (op->bytes < 48) return 0;
if (strncmp ((char *)header, "fisbone", 7)) return 0;
content = oggz_stream_get_content (oggz, serialno);
if (content != OGGZ_CONTENT_SKELETON) return 0;
fisbone_serialno = (long) INT32_LE_AT(&header[12]);
/* Don't override an already assigned metric */
......@@ -369,32 +304,68 @@ auto_fisbone (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
return 1;
}
static const OggzReadPacket auto_readers[] = {
auto_speex,
auto_vorbis,
auto_theora,
auto_annodex,
auto_anxdata,
auto_flac0,
auto_flac,
auto_cmml,
auto_fishead,
auto_fisbone,
auto_oggpcm2,
NULL
};
static int
auto_fishead (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
{
if (!op->b_o_s)
{
return auto_fisbone(oggz, op, serialno, user_data);
}
oggz_set_granulerate (oggz, serialno, 0, 1);
return 1;
}
const oggz_auto_contenttype_t oggz_auto_codec_ident[] = {
{"\200theora", 7, "Theora", auto_theora},
{"\001vorbis", 7, "Vorbis", auto_vorbis},
{"Speex", 5, "Speex", auto_speex},
{"PCM ", 8, "PCM", auto_oggpcm2},
{"CMML\0\0\0\0", 8, "CMML", auto_cmml},
{"Annodex", 8, "Annodex", auto_annodex},
{"fishead", 7, "Skeleton", auto_fishead},
{"fLaC", 4, "Flac0", auto_flac0},
{"\177FLAC", 4, "Flac", auto_flac},
{"AnxData", 7, "AnxData", auto_anxdata},
{"", 0, "Unknown"}
};
int oggz_auto_identify (OGGZ *oggz, ogg_page *og, long serialno) {
int i;
for (i = 0; i < OGGZ_CONTENT_UNKNOWN; i++)
{
const oggz_auto_contenttype_t *codec = oggz_auto_codec_ident + i;
if (og->body_len >= codec->bos_str_len &&
memcmp (og->body, codec->bos_str, codec->bos_str_len) == 0) {
oggz_stream_set_content (oggz, serialno, i);
return 1;
}
}
oggz_stream_set_content (oggz, serialno, OGGZ_CONTENT_UNKNOWN);
return 0;
}
int
oggz_auto (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data)
oggz_auto_get_granulerate (OGGZ * oggz, ogg_packet * op, long serialno,
void * user_data)
{
OggzReadPacket read_packet;
int i = 0;
int content = 0;
int will_run_function;
for (read_packet = auto_readers[0]; read_packet;
read_packet = auto_readers[++i]) {
if (read_packet (oggz, op, serialno, user_data)) return 0;
content = oggz_stream_get_content(oggz, serialno);
if (content < 0 || content >= OGGZ_CONTENT_UNKNOWN) {
return 0;
}
oggz_auto_codec_ident[content].reader(oggz, op, serialno, user_data);
return 0;
}
......
......@@ -388,4 +388,6 @@ This was the Theora header for theora-alpha2:
*/
int oggz_auto_identify (OGGZ *oggz, ogg_page *og, long serialno);
#endif /* __OGGZ_AUTO_H__ */
......@@ -47,14 +47,15 @@ typedef struct _OggzIO OggzIO;
typedef struct _OggzReader OggzReader;
typedef struct _OggzWriter OggzWriter;
/* oggz_stream */
#include "oggz_stream.h"
typedef int (*OggzReadPacket) (OGGZ * oggz, ogg_packet * op, long serialno,
void * user_data);
typedef int (*OggzReadPage) (OGGZ * oggz, const ogg_page * og, long serialno,
void * user_data);
/* oggz_stream */
#include "oggz_stream.h"
typedef ogg_int64_t (*OggzMetric) (OGGZ * oggz, long serialno,
ogg_int64_t granulepos,
void * user_data);
......@@ -235,7 +236,8 @@ int oggz_has_metrics (OGGZ * oggz);
int oggz_purge (OGGZ * oggz);
int oggz_auto (OGGZ * oggz, ogg_packet * op, long serialno, void * user_data);
int oggz_auto_get_granulerate (OGGZ * oggz, ogg_packet * op, long serialno,
void * user_data);
/* oggz_io */
size_t oggz_io_read (OGGZ * oggz, void * buf, size_t n);
......
......@@ -293,8 +293,23 @@ oggz_read_sync (OGGZ * oggz)
/* got a packet. process it */
granulepos = op->granulepos;
if (!stream->metric && (oggz->flags & OGGZ_AUTO)) {
oggz_auto (oggz, op, serialno, NULL);
/*
* need to call oggz_auto to process Anx v2 streams which were headed
* with AnxData packets. This enables the AnxData-provided granulerate
* to be overridden by the stream's rate if present
*/
if
(
(
!stream->metric
||
(oggz_stream_get_content(oggz, serialno) == OGGZ_CONTENT_SKELETON)
)
&&
(oggz->flags & OGGZ_AUTO)
)
{
oggz_auto_get_granulerate (oggz, op, serialno, NULL);
}
/* set unit on last packet of page */
......@@ -332,7 +347,18 @@ oggz_read_sync (OGGZ * oggz)
/* error -- could not add stream */
return -7;
}
/* identify stream type */
oggz_auto_identify(oggz, &og, serialno);
}
else if (oggz_stream_get_content(oggz, serialno) == OGGZ_CONTENT_ANXDATA)
{
/*
* re-identify ANXDATA streams as these are now content streams
*/
oggz_auto_identify(oggz, &og, serialno);
}
os = &stream->ogg_stream;
{
......
......@@ -47,13 +47,28 @@ oggz_stream_set_content (OGGZ * oggz, long serialno, int content)
return 0;
}
int
OggzStreamContent
oggz_stream_get_content (OGGZ * oggz, long serialno)
{
oggz_stream_t * stream;
if (oggz == NULL) return OGGZ_ERR_BAD_OGGZ;
stream = oggz_get_stream (oggz, serialno);
if (stream == NULL) return OGGZ_ERR_BAD_SERIALNO;
return stream->content;
}
const char *
oggz_stream_get_content_type (OGGZ *oggz, long serialno)
{
int content = oggz_stream_get_content(oggz, serialno);
if (content == OGGZ_ERR_BAD_SERIALNO || content == OGGZ_ERR_BAD_OGGZ)
{
return NULL;
}
return oggz_auto_codec_ident[content].content_type;
}
......@@ -33,11 +33,14 @@
#ifndef __OGGZ_STREAM_H__
#define __OGGZ_STREAM_H__
enum {
OGGZ_CONTENT_UNKNOWN = 0,
OGGZ_CONTENT_SKELETON = 1,
OGGZ_CONTENT_FLAC0 = 13
};
typedef struct {
const char *bos_str;
int bos_str_len;
const char *content_type;
OggzReadPacket reader;
} oggz_auto_contenttype_t;
extern const oggz_auto_contenttype_t oggz_auto_codec_ident[];
typedef struct _oggz_stream_t oggz_stream_t;
......@@ -46,6 +49,7 @@ oggz_stream_t * oggz_add_stream (OGGZ * oggz, long serialno);
int oggz_stream_has_metric (OGGZ * oggz, long serialno);
int oggz_stream_set_content (OGGZ * oggz, long serialno, int content);
int oggz_stream_get_content (OGGZ * oggz, long serialno);
OggzStreamContent oggz_stream_get_content (OGGZ * oggz, long serialno);
const char * oggz_stream_get_content_type (OGGZ *oggz, long serialno);
#endif /* __OGGZ_STREAM_H__ */
......@@ -277,7 +277,7 @@ oggz_write_feed (OGGZ * oggz, ogg_packet * op, long serialno, int flush,
/* OK -- Update stream's memory of packet details */
if (!stream->metric && (oggz->flags & OGGZ_AUTO)) {
oggz_auto (oggz, op, serialno, NULL);
oggz_auto_get_granulerate (oggz, op, serialno, NULL);
}
stream->b_o_s = 0; /* The stream is henceforth no longer at bos */
......
......@@ -108,7 +108,7 @@ filter_page (OGGZ * oggz, const ogg_page * og, long serialno, void * user_data)
/* set scanning callback for keyframe calculation on theora pages only */
if (osdata->keyframes && ogg_page_bos ((ogg_page *)og)) {
ident = ot_page_identify (og, NULL);
ident = ot_page_identify (oggz, og, NULL);
if (ident && (strcasecmp ("theora", ident) == 0)) {
oggz_set_read_callback (oggz, serialno, osdata->read_packet, osdata);
}
......
......@@ -173,7 +173,7 @@ read_page (OGGZ * oggz, const ogg_page * og, long serialno, void * user_data)
int ret = 0;
if (ogg_page_bos ((ogg_page *)og)) {
content_type = ot_page_identify (og, NULL);
content_type = ot_page_identify (oggz, og, NULL);
if (content_type) {
if (!strcmp (content_type, "Theora")) {
......
......@@ -115,13 +115,6 @@ _le_64 (ogg_int64_t l)
typedef char * (* OTCodecInfoFunc) (unsigned char * data, long n);
typedef struct {
const char *bos_str;
int bos_str_len;
const char *content_type;
OTCodecInfoFunc info_func;
} OTCodecIdent;
static char *
ot_theora_info (unsigned char * data, long len)
{
......@@ -209,44 +202,42 @@ ot_skeleton_info (unsigned char * data, long len)
return buf;
}
static const OTCodecIdent codec_ident[] = {
{"\200theora", 7, "Theora", ot_theora_info},
{"\001vorbis", 7, "Vorbis", ot_vorbis_info},
{"Speex", 5, "Speex", ot_speex_info},
{"PCM ", 8, "PCM", ot_oggpcm2_info},
{"CMML\0\0\0\0", 8, "CMML", NULL},
{"Annodex", 8, "Annodex", NULL},
{"fishead", 8, "Skeleton", ot_skeleton_info},
{NULL}
static const OTCodecInfoFunc codec_ident[] = {
ot_theora_info,
ot_vorbis_info,
ot_speex_info,
ot_oggpcm2_info,
NULL, /* CMML */
NULL, /* ANNODEX */
ot_skeleton_info,
NULL, /* FLAC0 */
NULL, /* FLAC */
NULL, /* ANXDATA */
NULL /* UNKOWN */
};
const char *
ot_page_identify (const ogg_page * og, char ** info)
ot_page_identify (OGGZ *oggz, const ogg_page * og, char ** info)
{
const char * ret = NULL;
int i;
/* try to identify stream codec name by looking at the first bytes of the
* first packet */
for (i = 0;; i++) {
const OTCodecIdent *ident = &codec_ident[i];
if (ident->bos_str == NULL) {
ret = NULL;
break;
}
if (og->body_len >= ident->bos_str_len &&
memcmp (og->body, ident->bos_str, ident->bos_str_len) == 0) {
ret = ident->content_type;
if (info) {
if (ident->info_func) {
*info = ident->info_func (og->body, og->body_len);
} else {
*info = NULL;
}
}
break;
int serial_no;
int content;
/*
* identify stream content using oggz_stream_get_content, identify
* stream content name using oggz_stream_get_content_type
*/
serial_no = ogg_page_serialno(og);
content = oggz_stream_get_content(oggz, serial_no);
ret = oggz_stream_get_content_type(oggz, serial_no);
if (info != NULL)
{
if (codec_ident[content] != NULL)
{
*info = codec_ident[content](og->body, og->body_len);
}
}
......
......@@ -36,7 +36,7 @@
#include "config.h"
const char *
ot_page_identify (const ogg_page * og, char ** info);
ot_page_identify (OGGZ *oggz, const ogg_page * og, char ** info);
/*
* Print a number of bytes to 3 significant figures
......
......@@ -311,7 +311,7 @@ filter_page (OGGZ * oggz, const ogg_page * og, long serialno, void * user_data)
int i, n;
if (ogg_page_bos ((ogg_page *)og)) {
ident = ot_page_identify (og, NULL);
ident = ot_page_identify (oggz, og, NULL);
if (ident) {
n = oggz_table_size (oddata->content_types_table);
for (i = 0; i < n; i++) {
......
......@@ -274,7 +274,7 @@ read_page_pass1 (OGGZ * oggz, const ogg_page * og, long serialno, void * user_da
}
if (ogg_page_bos ((ogg_page *)og)) {
oit->codec_name = ot_page_identify (og, &oit->codec_info);
oit->codec_name = ot_page_identify (oggz, og, &oit->codec_info);
}
bytes = og->header_len + og->body_len;
......@@ -337,7 +337,7 @@ read_packet_pass2 (OGGZ * oggz, ogg_packet * op, long serialno,
OI_Info * info = (OI_Info *)user_data;
OI_TrackInfo * oit;
long deviation;
oit = oggz_table_lookup (info->tracks, serialno);
/* Increment the packet length deviation squared total */
......
......@@ -219,7 +219,8 @@ oggz_merge (OMData * omdata, FILE * outfile)
const char * codec_name;
int is_vorbis = 0;
if ((codec_name = ot_page_identify (input->og, NULL)) != NULL)
if ((codec_name =
ot_page_identify (input->reader, input->og, NULL)) != NULL)
is_vorbis = !strcmp (codec_name, "Vorbis");
if (i == 0 && is_vorbis)
......
......@@ -166,7 +166,8 @@ filter_stream_p (const ORData *ordata, ORStream *stream,
}
static ORStream *
orstream_new (const ORData *ordata, const ogg_page *og, long serialno)
orstream_new (OGGZ *oggz, const ORData *ordata, const ogg_page *og,
long serialno)
{
const char * ident;
......@@ -177,7 +178,7 @@ orstream_new (const ORData *ordata, const ogg_page *og, long serialno)
stream->streamid = streamid_count++;
stream->content_type = "unknown";
ident = ot_page_identify (og, NULL);
ident = ot_page_identify (oggz, og, NULL);
if (ident != NULL) stream->content_type = ident;
if (ordata->verbose)
......@@ -233,7 +234,7 @@ read_page (OGGZ *oggz, const ogg_page *og, long serialno, void *user_data)
ORStream *stream = oggz_table_lookup (ordata->streams, serialno);
if (ogg_page_bos ((ogg_page *)og)) {
stream = orstream_new (ordata, og, serialno);
stream = orstream_new (oggz, ordata, og, serialno);
stream = oggz_table_insert (ordata->streams, serialno, stream);
assert (stream != NULL);
......
......@@ -81,3 +81,8 @@ oggz_table_lookup @46
oggz_table_size @47
oggz_table_nth @48
;
;oggz stream functions (determining content type)
;
oggz_stream_get_content @100
oggz_stream_get_content_type @101
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment