Commit f65dab6f authored by Timothy B. Terriberry's avatar Timothy B. Terriberry

Add UTF-8 filename support to Windows.

As requested here:
http://www.hydrogenaudio.org/forums/index.php?showtopic=101817
parent 7aea3cae
......@@ -21,18 +21,22 @@ libopusurl_la_LIBADD = libopusfile.la $(URL_DEPS_LIBS)
libopusurl_la_LDFLAGS = -no-undefined \
-version-info @OP_LT_CURRENT@:@OP_LT_REVISION@:@OP_LT_AGE@
if OP_ENABLE_HTTP
noinst_PROGRAMS = examples/opusfile_example examples/seeking_example
examples_opusfile_example_SOURCES = examples/opusfile_example.c
examples_seeking_example_SOURCES = examples/seeking_example.c
examples_opusfile_example_LDADD = libopusurl.la libopusfile.la
examples_seeking_example_LDADD = libopusurl.la libopusfile.la
if OP_WIN32
if OP_ENABLE_HTTP
libopusurl_la_SOURCES += src/wincerts.c
libopusurl_la_LIBADD += -lws2_32 -lcrypt32
endif
examples_opusfile_example_SOURCES += examples/win32utf8.c
examples_seeking_example_SOURCES += examples/win32utf8.c
endif
noinst_PROGRAMS = examples/opusfile_example examples/seeking_example
examples_opusfile_example_LDADD = libopusurl.la libopusfile.la
examples_seeking_example_LDADD = libopusurl.la libopusfile.la
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = opusfile.pc opusurl.pc
......
......@@ -21,12 +21,12 @@
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <opusfile.h>
#if defined(_WIN32)
/*We need the following two to set stdin/stdout to binary.*/
# include <io.h>
# include <fcntl.h>
# include "win32utf8.h"
# undef fileno
# define fileno _fileno
#endif
#include <opusfile.h>
static void print_duration(FILE *_fp,ogg_int64_t _nsamples,int _frac){
ogg_int64_t seconds;
......@@ -138,15 +138,7 @@ int main(int _argc,const char **_argv){
int is_ssl;
int output_seekable;
#if defined(_WIN32)
# undef fileno
# define fileno _fileno
/*We need to set stdin/stdout to binary mode. Damn windows.*/
/*Beware the evil ifdef. We avoid these where we can, but this one we
cannot.
Don't add any more.
You'll probably go to hell if you do.*/
_setmode(fileno(stdin),_O_BINARY);
_setmode(fileno(stdout),_O_BINARY);
win32_utf8_setup(&_argc,&_argv);
#endif
if(_argc!=2){
fprintf(stderr,"Usage: %s <file.opus>\n",_argv[0]);
......@@ -289,6 +281,7 @@ int main(int _argc,const char **_argv){
print_size(stderr,bitrate,1," ");
fprintf(stderr,"bps) \r");
pcm_print_offset=pcm_offset;
fflush(stderr);
}
next_pcm_offset=op_pcm_tell(of);
if(pcm_offset+ret!=next_pcm_offset){
......
......@@ -22,12 +22,12 @@
#include <errno.h>
#include <math.h>
#include <string.h>
#include <opusfile.h>
#if defined(_WIN32)
/*We need the following two to set stdin/stdout to binary.*/
# include <io.h>
# include <fcntl.h>
# include "win32utf8.h"
# undef fileno
# define fileno _fileno
#endif
#include <opusfile.h>
/*Use shorts, they're smaller.*/
#if !defined(OP_FIXED_POINT)
......@@ -261,15 +261,7 @@ int main(int _argc,const char **_argv){
OggOpusFile *of;
void *fp;
#if defined(_WIN32)
# undef fileno
# define fileno _fileno
/*We need to set stdin/stdout to binary mode. Damn windows.*/
/*Beware the evil ifdef. We avoid these where we can, but this one we
cannot.
Don't add any more.
You'll probably go to hell if you do.*/
_setmode(fileno(stdin),_O_BINARY);
_setmode(fileno(stdout),_O_BINARY);
win32_utf8_setup(&_argc,&_argv);
#endif
if(_argc!=2){
fprintf(stderr,"Usage: %s <file.opus>\n",_argv[0]);
......
#if defined(_WIN32)
# include <stdio.h>
# include <stdlib.h>
# include <wchar.h>
/*We need the following two to set stdin/stdout to binary.*/
# include <io.h>
# include <fcntl.h>
# define WIN32_LEAN_AND_MEAN
# define WIN32_EXTRA_LEAN
# include <windows.h>
# include "win32utf8.h"
static char *utf16_to_utf8(const wchar_t *_src){
char *dst;
size_t len;
size_t si;
size_t di;
len=wcslen(_src);
dst=(char *)malloc(sizeof(*dst)*(3*len+1));
if(dst==NULL)return dst;
for(di=si=0;si<len;si++){
unsigned c0;
c0=_src[si];
if(c0<0x80){
/*Can be represented by a 1-byte sequence.*/
dst[di++]=(char)c0;
continue;
}
else if(c0<0x800){
/*Can be represented by a 2-byte sequence.*/
dst[di++]=(char)(0xC0|c0>>6);
dst[di++]=(char)(0x80|c0&0x3F);
continue;
}
else if(c0>=0xD800&&c0<0xDC00&&si+1<len){
unsigned c1;
c1=_src[si+1];
if(c1>=0xDC00&&c1<0xE000){
unsigned w;
/*Surrogate pair.*/
w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
/*Can be represented by a 4-byte sequence.*/
dst[di++]=(char)(0xF0|w>>18);
dst[di++]=(char)(0x80|w>>12&0x3F);
dst[di++]=(char)(0x80|w>>6&0x3F);
dst[di++]=(char)(0x80|w&0x3F);
si++;
continue;
}
}
/*Anything else is either a valid 3-byte sequence, or an invalid
surrogate pair.
In the latter case, we just encode the value as a 3-byte
sequence anyway (producing technically invalid UTF-8).
Later error handling will detect the problem, with a better
chance of giving a useful error message.*/
dst[di++]=(char)(0xE0|c0>>12);
dst[di++]=(char)(0x80|c0>>6&0x3F);
dst[di++]=(char)(0x80|c0&0x3F);
}
dst[di++]='\0';
return dst;
}
typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
int *num_args);
/*Make a best-effort attempt to support UTF-8 on Windows.*/
void win32_utf8_setup(int *_argc,const char ***_argv){
HMODULE hlib;
/*We need to set stdin/stdout to binary mode.
This is unrelated to UTF-8 support, but it's platform specific and we need
to do it in the same places.*/
_setmode(_fileno(stdin),_O_BINARY);
_setmode(_fileno(stdout),_O_BINARY);
hlib=LoadLibraryA("shell32.dll");
if(hlib!=NULL){
command_line_to_argv_w_func command_line_to_argv_w;
/*This function is only available on Windows 2000 or later.*/
command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
"CommandLineToArgvW");
if(command_line_to_argv_w!=NULL){
wchar_t **argvw;
int argc;
argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
if(argvw!=NULL){
int ai;
/*Really, I don't see why argc would ever differ from *_argc, but let's
be paranoid.*/
if(argc>*_argc)argc=*_argc;
for(ai=0;ai<argc;ai++){
char *argv;
argv=utf16_to_utf8(argvw[ai]);
if(argv!=NULL)(*_argv)[ai]=argv;
}
*_argc=argc;
LocalFree(argvw);
}
}
FreeLibrary(hlib);
}
# if defined(CP_UTF8)
/*This does not work correctly in all environments (it breaks output in
mingw32 for me), and requires a Unicode font (e.g., when using the default
Raster font, even characters that are available in the font's codepage
won't display properly).*/
/*SetConsoleOutputCP(CP_UTF8);*/
# endif
}
#endif
#if !defined(_win32utf8_H)
# define _win32utf8_H (1)
# if defined(_WIN32)
/*Make a best-effort attempt to support UTF-8 on Windows.*/
void win32_utf8_setup(int *_argc,const char ***_argv);
# endif
#endif
......@@ -630,6 +630,10 @@ struct OpusFileCallbacks{
If there is an error opening the file, nothing will be
filled in here.
\param _path The path to the file to open.
On Windows, this string must be UTF-8 (to allow access to
files whose names cannot be represented in the current
MBCS code page).
All other systems use the native character encoding.
\param _mode The mode to open the file in.
\return A stream handle to use with the callbacks, or <code>NULL</code> on
error.*/
......@@ -663,6 +667,10 @@ OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb,
If there is an error opening the file, nothing will be
filled in here.
\param _path The path to the file to open.
On Windows, this string must be UTF-8 (to allow access
to files whose names cannot be represented in the
current MBCS code page).
All other systems use the native character encoding.
\param _mode The mode to open the file in.
\param _stream A stream previously returned by op_fopen(), op_fdopen(),
or op_freopen().
......
......@@ -103,9 +103,124 @@ static const OpusFileCallbacks OP_FILE_CALLBACKS={
(op_close_func)fclose
};
#if defined(_WIN32)
# include <stddef.h>
# include <errno.h>
/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API,
so if we just pass the path to fopen(), then there'd be no way for a user
of our API to open a Unicode filename.
Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API.
This makes this API more consistent with platforms where the character set
used by fopen is the same as used on disk, which is generally UTF-8, and
with our metadata API, which always uses UTF-8.*/
static wchar_t *op_utf8_to_utf16(const char *_src){
wchar_t *dst;
size_t len;
len=strlen(_src);
/*Worst-case output is 1 wide character per 1 input character.*/
dst=(wchar_t *)malloc(sizeof(*dst)*(len+1));
if(dst!=NULL){
size_t si;
size_t di;
for(di=si=0;si<len;si++){
int c0;
c0=(unsigned char)_src[si];
if(!(c0&0x80)){
/*Start byte says this is a 1-byte sequence.*/
dst[di++]=(wchar_t)c0;
continue;
}
else if(si+1<len){
int c1;
c1=(unsigned char)_src[si+1];
if((c1&0xC0)==0x80){
/*Found at least one continuation byte.*/
if((c0&0xE0)==0xC0){
wchar_t w;
/*Start byte says this is a 2-byte sequence.*/
w=c0&0x1F<<6|c1&0x3F;
if(w>=0x80U){
/*This is a 2-byte sequence that is not overlong.*/
dst[di++]=w;
si++;
continue;
}
}
else if(si+2<len){
int c2;
c2=(unsigned char)_src[si+2];
if((c2&0xC0)==0x80){
/*Found at least two continuation bytes.*/
if((c0&0xF0)==0xE0){
wchar_t w;
/*Start byte says this is a 3-byte sequence.*/
w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F;
if(w>=0x800U&&(w<0xD800||w>=0xE000)){
/*This is a 3-byte sequence that is not overlong and not a
UTF-16 surrogate pair value.*/
dst[di++]=w;
si+=2;
continue;
}
}
else if(si+3<len){
int c3;
c3=(unsigned char)_src[si+3];
if((c3&0xC0)==0x80){
/*Found at least three continuation bytes.*/
if((c0&0xF8)==0xF0){
opus_uint32 w;
/*Start byte says this is a 4-byte sequence.*/
w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F);
if(w>=0x10000U&&w<0x110000U){
/*This is a 4-byte sequence that is not overlong and not
greater than the largest valid Unicode code point.
Convert it to a surrogate pair.*/
w-=0x10000;
dst[di++]=(wchar_t)(0xD800+(w>>10));
dst[di++]=(wchar_t)(0xDC00+(w&0x3FF));
si+=3;
continue;
}
}
}
}
}
}
}
}
/*If we got here, we encountered an illegal UTF-8 sequence.*/
free(dst);
return NULL;
}
OP_ASSERT(di<=len);
dst[di]='\0';
}
return dst;
}
#endif
void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){
FILE *fp;
#if !defined(_WIN32)
fp=fopen(_path,_mode);
#else
fp=NULL;
if(_path==NULL||_mode==NULL)errno=EINVAL;
else{
wchar_t *wpath;
wchar_t *wmode;
wpath=op_utf8_to_utf16(_path);
wmode=op_utf8_to_utf16(_mode);
if(wmode==NULL)errno=EINVAL;
else if(wpath==NULL)errno=ENOENT;
else fp=_wfopen(wpath,wmode);
free(wmode);
free(wpath);
}
#endif
if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
return fp;
}
......@@ -120,7 +235,23 @@ void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){
void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode,
void *_stream){
FILE *fp;
#if !defined(_WIN32)
fp=freopen(_path,_mode,(FILE *)_stream);
#else
fp=NULL;
if(_path==NULL||_mode==NULL)errno=EINVAL;
else{
wchar_t *wpath;
wchar_t *wmode;
wpath=op_utf8_to_utf16(_path);
wmode=op_utf8_to_utf16(_mode);
if(wmode==NULL)errno=EINVAL;
else if(wpath==NULL)errno=ENOENT;
else fp=_wfreopen(wpath,wmode,(FILE *)_stream);
free(wmode);
free(wpath);
}
#endif
if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
return fp;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment