Commit 96eb84c5 authored by Jan Gerber's avatar Jan Gerber
Browse files

* transcode text subtitles found in the video.

  Only text (raw UTF-8 and SSA) is supported.
  SSA tags are not properly removed yet.
* update libkate to 0.3.1

parent 0fd9b742
.\" Hey, EMACS: -*- nroff -*-
.TH FFMPEG2THEORA 1 "August 31, 2008"
.TH FFMPEG2THEORA 1 "April 9, 2009"
.\" Please adjust this date whenever revising the manpage.
.\"
.\" Some roff macros, for reference:
......@@ -179,6 +179,11 @@ will be ignored. This may be useful if there are stray sequences in
an otherwise UTF-8 file. Note that, since those invalid sequences
will be removed from the output, this option is not a substitute to
converting a non UTF-8 file to UTF-8.
.TP
.B \-\-nosubtitles
Disables subtitles from input.
Note that subtitles explicitely loaded from external files will still
be used.
.SS Metadata options:
.TP
.B \-\-artist
......
#!/bin/bash
version=0.3.0
version=0.3.1
baseurl="http://libkate.googlecode.com/files/libkate-$version.tar.gz"
which wget >& /dev/null
......
......@@ -44,6 +44,7 @@
#endif
#include "theorautils.h"
#include "iso639.h"
#include "subtitles.h"
#include "ffmpeg2theora.h"
......@@ -54,6 +55,7 @@ enum {
SYNC_FLAG,
NOAUDIO_FLAG,
NOVIDEO_FLAG,
NOSUBTITLES_FLAG,
NOUPSCALING_FLAG,
CROPTOP_FLAG,
CROPBOTTOM_FLAG,
......@@ -146,6 +148,7 @@ static ff2theora ff2theora_init() {
if (this != NULL) {
this->disable_audio=0;
this->disable_video=0;
this->disable_subtitles=0;
this->no_upscaling=0;
this->video_index = -1;
this->audio_index = -1;
......@@ -300,6 +303,73 @@ static void prepare_yuv_buffer(ff2theora this, yuv_buffer *yuv, AVFrame *frame)
}
}
static int is_supported_subtitle_stream(ff2theora this, int idx)
{
AVCodecContext *enc = this->context->streams[idx]->codec;
if (enc->codec_type != CODEC_TYPE_SUBTITLE) return 0;
switch (enc->codec_id) {
case CODEC_ID_TEXT:
case CODEC_ID_SSA:
return 1;
default:
return 0;
}
return 0;
}
static const char *get_raw_text_from_ssa(const char *ssa)
{
int n;
const char *ptr=ssa;
for (n=0;n<9;++n) {
ptr=strchr(ptr,',');
if (!ptr) return NULL;
++ptr;
}
return ptr;
}
static const float get_ssa_time(const char *p)
{
int hour, min, sec, hsec;
int r;
if(sscanf(p, "%d:%d:%d%*c%d", &hour, &min, &sec, &hsec) != 4)
return 0;
min+= 60*hour;
sec+= 60*min;
return (float)(sec*100+hsec)/100;
}
static const float get_duration_from_ssa(const char *ssa)
{
int n;
float d = 2.0f;
double start, end;
const char *ptr=ssa;
ptr=strchr(ptr,',');
if (!ptr) return d;
ptr++;
start = get_ssa_time(ptr);
ptr=strchr(ptr,',');
if (!ptr) return d;
ptr++;
end = get_ssa_time(ptr);
return end-start;
}
static const char *find_language_for_subtitle_stream(const AVStream *s)
{
const char *lang=find_iso639_1(s->language);
if (!lang) {
fprintf(stderr,"WARNING - unrecognized ISO 639-2 language code: %s\n",s->language);
}
return lang;
}
void ff2theora_output(ff2theora this) {
unsigned int i;
AVCodecContext *aenc = NULL;
......@@ -314,6 +384,8 @@ void ff2theora_output(ff2theora this) {
double fps = 0.0;
AVRational vstream_fps;
int display_width, display_height;
char *subtitles_enabled = (char*)alloca(this->context->nb_streams);
char *subtitles_opened = (char*)alloca(this->context->nb_streams);
if (this->audiostream >= 0 && this->context->nb_streams > this->audiostream) {
AVCodecContext *enc = this->context->streams[this->audiostream]->codec;
......@@ -651,6 +723,54 @@ void ff2theora_output(ff2theora this) {
}
}
for (i = 0; i < this->context->nb_streams; i++) {
subtitles_enabled[i] = 0;
subtitles_opened[i] = 0;
if (!this->disable_subtitles) {
AVStream *stream = this->context->streams[i];
AVCodecContext *enc = stream->codec;
if (enc->codec_type == CODEC_TYPE_SUBTITLE) {
AVCodec *codec = avcodec_find_decoder (enc->codec_id);
if (codec && avcodec_open (enc, codec) >= 0) {
subtitles_opened[i] = 1;
}
if (enc->codec_id == CODEC_ID_TEXT || enc->codec_id == CODEC_ID_SSA || subtitles_opened[i]) {
subtitles_enabled[i] = 1;
add_subtitles_stream(this, i, find_language_for_subtitle_stream(stream), NULL);
}
else {
fprintf(stderr,"Subtitle stream %d codec not supported, ignored\n", i);
}
}
}
}
for (i=0; i<this->n_kate_streams; ++i) {
ff2theora_kate_stream *ks=this->kate_streams+i;
if (ks->stream_index >= 0) {
printf("Muxing Kate stream %d from input stream %d\n",
i,ks->stream_index);
if (!this->disable_subtitles) {
info.with_kate=1;
}
}
else if (load_subtitles(ks,this->ignore_non_utf8)>0) {
printf("Muxing Kate stream %d from %s as %s %s\n",
i,ks->filename,
ks->subtitles_language[0]?ks->subtitles_language:"<unknown language>",
ks->subtitles_category[0]?ks->subtitles_category:"SUB");
}
else {
if (i!=this->n_kate_streams) {
memmove(this->kate_streams+i,this->kate_streams+i+1,(this->n_kate_streams-i-1)*sizeof(ff2theora_kate_stream));
--this->n_kate_streams;
--i;
}
}
}
oggmux_setup_kate_streams(&info, this->n_kate_streams);
if (this->video_index >= 0 || this->audio_index >= 0) {
AVFrame *frame=NULL;
AVFrame *frame_p=NULL;
......@@ -769,7 +889,7 @@ void ff2theora_output(ff2theora this) {
ff2theora_kate_stream *ks = this->kate_streams+i;
kate_info *ki = &info.kate_streams[i].ki;
kate_info_init(ki);
if (ks->num_subtitles > 0) {
if (ks->stream_index >= 0 || ks->num_subtitles > 0) {
if (!ks->subtitles_language[0]) {
fprintf(stderr, "WARNING - Subtitles language not set for input file %d\n",i);
}
......@@ -1019,6 +1139,44 @@ void ff2theora_output(ff2theora this) {
}
}
if (!this->disable_subtitles && subtitles_enabled[pkt.stream_index] && is_supported_subtitle_stream(this, pkt.stream_index)) {
AVStream *stream=this->context->streams[pkt.stream_index];
AVCodecContext *enc = stream->codec;
if (enc) {
if (enc->codec_id == CODEC_ID_TEXT || enc->codec_id == CODEC_ID_SSA) {
const char *utf8 = pkt.data;
size_t utf8len = pkt.size;
float t = (float)pkt.pts * stream->time_base.num / stream->time_base.den - this->start_time;
// my test case has 0 duration, how clever of that. I assume it's that old 'ends whenever the next
// one starts' hack, but it means I don't know in advance what duration it has. Great!
float duration;
if (pkt.duration <= 0) {
duration = 2.0f;
}
else {
duration = (float)pkt.duration * stream->time_base.num / stream->time_base.den;
}
// SSA has control stuff in there, extract raw text
if (enc->codec_id == CODEC_ID_SSA) {
duration = get_duration_from_ssa(utf8);
utf8 = get_raw_text_from_ssa(utf8);
if (utf8) {
utf8len = strlen(utf8);
}
}
if (t < 0 && t + duration > 0) {
duration += t;
t = 0;
}
if (utf8 && t >= 0)
add_subtitle_for_stream(this->kate_streams, this->n_kate_streams, pkt.stream_index, t, duration, utf8, utf8len);
}
else {
/* TODO: other types */
}
}
}
/* if we have subtitles starting before then, add it */
if (info.with_kate) {
double avtime = info.audio_only ? info.audiotime :
......@@ -1054,6 +1212,15 @@ void ff2theora_output(ff2theora this) {
}
}
if (!this->disable_subtitles) {
for (i = 0; i < this->context->nb_streams; i++) {
if (subtitles_opened[i]) {
AVCodecContext *enc = this->context->streams[i]->codec;
if (enc) avcodec_close(enc);
}
}
}
if (this->video_index >= 0) {
avcodec_close(venc);
}
......@@ -1290,6 +1457,7 @@ void print_usage() {
" --subtitles-language language set subtitles language (de, en_GB, etc)\n"
" --subtitles-category category set subtitles category (default \"subtitles\")\n"
" --subtitles-ignore-non-utf8 ignores any non utf-8 sequence in utf-8 text\n"
" --nosubtitles disables subtitles from input\n"
"\n"
#endif
"Metadata options:\n"
......@@ -1376,6 +1544,7 @@ int main(int argc, char **argv) {
{"nosound",0,&flag,NOAUDIO_FLAG},
{"noaudio",0,&flag,NOAUDIO_FLAG},
{"novideo",0,&flag,NOVIDEO_FLAG},
{"nosubtitles",0,&flag,NOSUBTITLES_FLAG},
{"no-upscaling",0,&flag,NOUPSCALING_FLAG},
#ifdef HAVE_FRAMEHOOK
{"vhook",required_argument,&flag,VHOOK_FLAG},
......@@ -1469,6 +1638,10 @@ int main(int argc, char **argv) {
convert->disable_video = 1;
flag = -1;
break;
case NOSUBTITLES_FLAG:
convert->disable_subtitles = 1;
flag = -1;
break;
case NOUPSCALING_FLAG:
convert->no_upscaling = 1;
flag = -1;
......@@ -1815,25 +1988,6 @@ int main(int argc, char **argv) {
}
}
for (n=0; n<convert->n_kate_streams; ++n) {
ff2theora_kate_stream *ks=convert->kate_streams+n;
if (load_subtitles(ks,convert->ignore_non_utf8)>0) {
printf("Muxing Kate stream %d from %s as %s %s\n",
n,ks->filename,
ks->subtitles_language[0]?ks->subtitles_language:"<unknown language>",
ks->subtitles_category[0]?ks->subtitles_category:"SUB");
}
else {
if (n!=convert->n_kate_streams) {
memmove(convert->kate_streams+n,convert->kate_streams+n+1,(convert->n_kate_streams-n-1)*sizeof(ff2theora_kate_stream));
--convert->n_kate_streams;
--n;
}
}
}
oggmux_setup_kate_streams(&info, convert->n_kate_streams);
//detect image sequences and set framerate if provided
if (av_guess_image2_codec(inputfile_name) != CODEC_ID_NONE || \
(input_fmt != NULL && strcmp(input_fmt->name, "video4linux") >= 0)) {
......@@ -1889,6 +2043,9 @@ int main(int argc, char **argv) {
if (convert->disable_video) {
fprintf(stderr, " [video disabled].\n");
}
if (convert->disable_subtitles) {
fprintf(stderr, " [subtitles disabled].\n");
}
if (convert->sync) {
fprintf(stderr, " Use A/V Sync from input container.\n");
}
......
......@@ -18,9 +18,15 @@ typedef struct ff2theora_subtitle{
} ff2theora_subtitle;
typedef struct ff2theora_kate_stream{
/* this block valid for subtitles loaded from a file */
const char *filename;
size_t num_subtitles;
ff2theora_subtitle *subtitles;
/* this block valid for subtitles coming from the source video */
int stream_index;
/* this block valid for all subtitle sources */
size_t subtitles_count; /* total subtitles output so far */
F2T_ENCODING subtitles_encoding;
char subtitles_language[16];
......@@ -45,6 +51,8 @@ typedef struct ff2theora{
int audio_bitrate;
int preset;
int disable_subtitles;
int picture_width;
int picture_height;
double fps;
......
#include <stddef.h>
#include "iso639.h"
static const struct {
const char *iso639_1;
const char *iso639_2t;
const char *iso639_2b;
} iso639[] = {
{"aa","aar",NULL},
{"ab","abk",NULL},
{"ae","ave",NULL},
{"af","afr",NULL},
{"ak","aka",NULL},
{"am","amh",NULL},
{"an","arg",NULL},
{"ar","ara",NULL},
{"as","asm",NULL},
{"av","ava",NULL},
{"ay","aym",NULL},
{"az","aze",NULL},
{"ba","bak",NULL},
{"be","bel",NULL},
{"bg","bul",NULL},
{"bh","bih",NULL},
{"bi","bis",NULL},
{"bm","bam",NULL},
{"bn","ben",NULL},
{"bo","bod","tib"},
{"br","bre",NULL},
{"bs","bos",NULL},
{"ca","cat",NULL},
{"ce","che",NULL},
{"ch","cha",NULL},
{"co","cos",NULL},
{"cr","cre",NULL},
{"cs","ces","cze"},
{"cu","chu",NULL},
{"cv","chv",NULL},
{"cy","cym","wel"},
{"da","dan",NULL},
{"de","deu","ger"},
{"dv","div",NULL},
{"dz","dzo",NULL},
{"ee","ewe",NULL},
{"el","ell","gre"},
{"en","eng",NULL},
{"eo","epo",NULL},
{"es","spa",NULL},
{"et","est",NULL},
{"eu","eus","baq"},
{"fa","fas","per"},
{"ff","ful",NULL},
{"fi","fin",NULL},
{"fj","fij",NULL},
{"fo","fao",NULL},
{"fr","fra","fre"},
{"fy","fry",NULL},
{"ga","gle",NULL},
{"gd","gla",NULL},
{"gl","glg",NULL},
{"gn","grn",NULL},
{"gu","guj",NULL},
{"gv","glv",NULL},
{"ha","hau",NULL},
{"he","heb",NULL},
{"hi","hin",NULL},
{"ho","hmo",NULL},
{"hr","hrv","scr"},
{"ht","hat",NULL},
{"hu","hun",NULL},
{"hy","hye","arm"},
{"hz","her",NULL},
{"ia","ina",NULL},
{"id","ind",NULL},
{"ie","ile",NULL},
{"ig","ibo",NULL},
{"ii","iii",NULL},
{"ik","ipk",NULL},
{"io","ido",NULL},
{"is","isl","ice"},
{"it","ita",NULL},
{"iu","iku",NULL},
{"ja","jpn",NULL},
{"jv","jav",NULL},
{"ka","kat","geo"},
{"kg","kon",NULL},
{"ki","kik",NULL},
{"kj","kua",NULL},
{"kk","kaz",NULL},
{"kl","kal",NULL},
{"km","khm",NULL},
{"kn","kan",NULL},
{"ko","kor",NULL},
{"kr","kau",NULL},
{"ks","kas",NULL},
{"ku","kur",NULL},
{"kv","kom",NULL},
{"kw","cor",NULL},
{"ky","kir",NULL},
{"la","lat",NULL},
{"lb","ltz",NULL},
{"lg","lug",NULL},
{"li","lim",NULL},
{"ln","lin",NULL},
{"lo","lao",NULL},
{"lt","lit",NULL},
{"lu","lub",NULL},
{"lv","lav",NULL},
{"mg","mlg",NULL},
{"mh","mah",NULL},
{"mi","mri","mao"},
{"mk","mkd","mac"},
{"ml","mal",NULL},
{"mn","mon",NULL},
{"mr","mar",NULL},
{"ms","msa","may"},
{"mt","mlt",NULL},
{"my","mya","bur"},
{"na","nau",NULL},
{"nb","nob",NULL},
{"nd","nde",NULL},
{"ne","nep",NULL},
{"ng","ndo",NULL},
{"nl","nld","dut"},
{"nn","nno",NULL},
{"no","nor",NULL},
{"nr","nbl",NULL},
{"nv","nav",NULL},
{"ny","nya",NULL},
{"oc","oci",NULL},
{"oj","oji",NULL},
{"om","orm",NULL},
{"or","ori",NULL},
{"os","oss",NULL},
{"pa","pan",NULL},
{"pi","pli",NULL},
{"pl","pol",NULL},
{"ps","pus",NULL},
{"pt","por",NULL},
{"qu","que",NULL},
{"rm","roh",NULL},
{"rn","run",NULL},
{"ro","ron","rum"},
{"ru","rus",NULL},
{"rw","kin",NULL},
{"sa","san",NULL},
{"sc","srd",NULL},
{"sd","snd",NULL},
{"se","sme",NULL},
{"sg","sag",NULL},
{"si","sin",NULL},
{"sk","slk","slo"},
{"sl","slv",NULL},
{"sm","smo",NULL},
{"sn","sna",NULL},
{"so","som",NULL},
{"sq","sqi","alb"},
{"sr","srp","scc"},
{"ss","ssw",NULL},
{"st","sot",NULL},
{"su","sun",NULL},
{"sv","swe",NULL},
{"sw","swa",NULL},
{"ta","tam",NULL},
{"te","tel",NULL},
{"tg","tgk",NULL},
{"th","tha",NULL},
{"ti","tir",NULL},
{"tk","tuk",NULL},
{"tl","tgl",NULL},
{"tn","tsn",NULL},
{"to","ton",NULL},
{"tr","tur",NULL},
{"ts","tso",NULL},
{"tt","tat",NULL},
{"tw","twi",NULL},
{"ty","tah",NULL},
{"ug","uig",NULL},
{"uk","ukr",NULL},
{"ur","urd",NULL},
{"uz","uzb",NULL},
{"ve","ven",NULL},
{"vi","vie",NULL},
{"vo","vol",NULL},
{"wa","wln",NULL},
{"wo","wol",NULL},
{"xh","xho",NULL},
{"yi","yid",NULL},
{"yo","yor",NULL},
{"za","zha",NULL},
{"zh","zho","chi"},
{"zu","zul",NULL},
};
const char *find_iso639_1(const char *iso639_2)
{
size_t n;
if (!iso639_2) return NULL;
for (n=0; n<sizeof(iso639)/sizeof(iso639[0]); ++n) {
if (!strcasecmp(iso639_2,iso639[n].iso639_2t) || (iso639[n].iso639_2b && !strcasecmp(iso639_2,iso639[n].iso639_2b))) {
return iso639[n].iso639_1;
}
}
return NULL;
}
#ifndef _F2T_ISO639_H_
#define _F2T_ISO639_H_
extern const char *find_iso639_1(const char *iso639_2);
#endif
......@@ -47,19 +47,40 @@ void add_kate_stream(ff2theora this){
ks->filename = NULL;
ks->num_subtitles = 0;
ks->subtitles = 0;
ks->stream_index = -1;
ks->subtitles_count = 0; /* denotes not set yet */
ks->subtitles_encoding = ENC_UNSET;
strcpy(ks->subtitles_language, "");
strcpy(ks->subtitles_category, "");
}
/*
* adds a stream for an embedded subtitles stream
*/
void add_subtitles_stream(ff2theora this,int stream_index,const char *language,const char *category){
ff2theora_kate_stream *ks;
add_kate_stream(this);
ks = &this->kate_streams[this->n_kate_streams-1];
ks->stream_index = stream_index;
if (!category) category="SUB";
strncpy(ks->subtitles_category, category, 16);
ks->subtitles_category[15] = 0;
if (language) {
strncpy(ks->subtitles_language, language, 16);
ks->subtitles_language[15] = 0;
}
}
/*
* sets the filename of the next subtitles file
*/
void set_subtitles_file(ff2theora this,const char *filename){
size_t n;
for (n=0; n<this->n_kate_streams;++n) {
if (!this->kate_streams[n].filename) break;
if (this->kate_streams[n].stream_index==-1 && !this->kate_streams[n].filename) break;
}
if (n==this->n_kate_streams) add_kate_stream(this);
this->kate_streams[n].filename = filename;
......@@ -71,7 +92,7 @@ void set_subtitles_file(ff2theora this,const char *filename){
void set_subtitles_language(ff2theora this,const char *language){
size_t n;
for (n=0; n<this->n_kate_streams;++n) {
if (!this->kate_streams[n].subtitles_language[0]) break;
if (this->kate_streams[n].stream_index==-1 && !this->kate_streams[n].subtitles_language[0]) break;
}
if (n==this->n_kate_streams) add_kate_stream(this);
strncpy(this->kate_streams[n].subtitles_language, language, 16);
......@@ -84,7 +105,7 @@ void set_subtitles_language(ff2theora this,const char *language){
void set_subtitles_category(ff2theora this,const char *category){
size_t n;
for (n=0; n<this->n_kate_streams;++n) {
if (!this->kate_streams[n].subtitles_category[0]) break;
if (this->kate_streams[n].stream_index==-1 && !this->kate_streams[n].subtitles_category[0]) break;