Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Thomas Daede
Vorbis tools
Commits
86fb1a3e
Commit
86fb1a3e
authored
Oct 20, 2001
by
Michael Smith
Browse files
Updates and bugfixes, plus extra tests, from Edmund Evans.
svn path=/trunk/vorbis-tools/; revision=2185
parent
f41242bc
Changes
3
Hide whitespace changes
Inline
Side-by-side
share/charset.c
View file @
86fb1a3e
...
...
@@ -88,7 +88,7 @@ int utf8_mbtowc(int *pwc, const char *s, size_t n)
else
if
(
c
<
0xc2
)
return
-
1
;
else
if
(
c
<
0xe0
)
{
if
(
n
>=
2
)
{
if
(
n
>=
2
&&
(
s
[
1
]
&
0xc0
)
==
0x80
)
{
if
(
pwc
)
*
pwc
=
((
c
&
0x1f
)
<<
6
)
|
(
s
[
1
]
&
0x3f
);
return
2
;
...
...
@@ -176,7 +176,7 @@ int utf8_wctomb(char *s, int wc1)
*/
struct
charset
{
int
min
,
max
;
int
max
;
int
(
*
mbtowc
)(
void
*
table
,
int
*
pwc
,
const
char
*
s
,
size_t
n
);
int
(
*
wctomb
)(
void
*
table
,
char
*
s
,
int
wc
);
void
*
map
;
...
...
@@ -192,11 +192,6 @@ int charset_wctomb(struct charset *charset, char *s, int wc)
return
(
*
charset
->
wctomb
)(
charset
->
map
,
s
,
wc
);
}
int
charset_min
(
struct
charset
*
charset
)
{
return
charset
->
min
;
}
int
charset_max
(
struct
charset
*
charset
)
{
return
charset
->
max
;
...
...
@@ -398,21 +393,21 @@ int wctomb_8bit(void *map1, char *s, int wc1)
*/
struct
charset
charset_utf8
=
{
1
,
6
,
6
,
&
mbtowc_utf8
,
&
wctomb_utf8
,
0
};
struct
charset
charset_iso1
=
{
1
,
1
,
1
,
&
mbtowc_iso1
,
&
wctomb_iso1
,
0
};
struct
charset
charset_ascii
=
{
1
,
1
,
1
,
&
mbtowc_ascii
,
&
wctomb_ascii
,
0
...
...
@@ -449,7 +444,6 @@ struct charset *charset_find(const char *code)
maps
[
i
].
charset
=
0
;
}
else
{
maps
[
i
].
charset
->
min
=
1
;
maps
[
i
].
charset
->
max
=
1
;
maps
[
i
].
charset
->
mbtowc
=
&
mbtowc_8bit
;
maps
[
i
].
charset
->
wctomb
=
&
wctomb_8bit
;
...
...
@@ -488,7 +482,7 @@ int charset_convert(const char *fromcode, const char *tocode,
if
(
!
charset1
||
!
charset2
)
return
-
1
;
tobuf
=
(
char
*
)
malloc
(
(
fromlen
/
charset1
->
min
)
*
charset2
->
max
+
1
);
tobuf
=
(
char
*
)
malloc
(
fromlen
*
charset2
->
max
+
1
);
if
(
!
tobuf
)
return
-
2
;
...
...
share/charset.h
0 → 100755
View file @
86fb1a3e
/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
/*
* These functions are like the C library's mbtowc() and wctomb(),
* but instead of depending on the locale they always work in UTF-8,
* and they use int instead of wchar_t.
*/
int
utf8_mbtowc
(
int
*
pwc
,
const
char
*
s
,
size_t
n
);
int
utf8_wctomb
(
char
*
s
,
int
wc
);
/*
* This is an object-oriented version of mbtowc() and wctomb().
* The caller first uses charset_find() to get a pointer to struct
* charset, then uses the mbtowc() and wctomb() methods on it.
* The function charset_max() gives the maximum length of a
* multibyte character in that encoding.
* This API is only appropriate for stateless encodings like UTF-8
* or ISO-8859-3, but I have no intention of implementing anything
* other than UTF-8 and 8-bit encodings.
*
* MINOR BUG: If there is no memory charset_find() may return 0 and
* there is no way to distinguish this case from an unknown encoding.
*/
struct
charset
;
struct
charset
*
charset_find
(
const
char
*
code
);
int
charset_mbtowc
(
struct
charset
*
charset
,
int
*
pwc
,
const
char
*
s
,
size_t
n
);
int
charset_wctomb
(
struct
charset
*
charset
,
char
*
s
,
int
wc
);
int
charset_max
(
struct
charset
*
charset
);
/*
* Function to convert a buffer from one encoding to another.
* Invalid bytes are replaced by '#', and characters that are
* not available in the target encoding are replaced by '?'.
* Each of TO and TOLEN may be zero if the result is not wanted.
* The input or output may contain null bytes, but the output
* buffer is also null-terminated, so it is all right to
* use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
*
* Return value:
*
* -2 : memory allocation failed
* -1 : unknown encoding
* 0 : data was converted exactly
* 1 : valid data was converted approximately (using '?')
* 2 : input was invalid (but still converted, using '#')
*/
int
charset_convert
(
const
char
*
fromcode
,
const
char
*
tocode
,
const
char
*
from
,
size_t
fromlen
,
char
**
to
,
size_t
*
tolen
);
share/charset_test.c
View file @
86fb1a3e
...
...
@@ -30,13 +30,22 @@ void test_any(struct charset *charset)
/* Decoder */
assert
(
charset_mbtowc
(
charset
,
0
,
(
char
*
)(
-
1
)
,
0
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
0
,
0
,
0
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
0
,
0
,
1
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
0
,
(
char
*
)(
-
1
),
0
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"x"
,
0
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"x"
,
1
)
==
1
&&
wc
==
'x'
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"x"
,
2
)
==
1
&&
wc
==
'x'
);
assert
(
charset_mbtowc
(
charset
,
0
,
"a"
,
0
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
0
,
""
,
1
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
0
,
"b"
,
1
)
==
1
);
assert
(
charset_mbtowc
(
charset
,
0
,
""
,
2
)
==
0
);
assert
(
charset_mbtowc
(
charset
,
0
,
"c"
,
2
)
==
1
);
wc
=
'x'
;
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"a"
,
0
)
==
0
&&
wc
==
'x'
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
""
,
1
)
==
0
&&
wc
==
0
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"b"
,
1
)
==
1
&&
wc
==
'b'
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
""
,
2
)
==
0
&&
wc
==
0
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"c"
,
2
)
==
1
&&
wc
==
'c'
);
/* Encoder */
...
...
@@ -81,6 +90,30 @@ void test_utf8()
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\277\277\277\277
"
,
9
)
==
6
&&
wc
==
0x7fffffff
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\302\000
"
,
2
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\302\300
"
,
2
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\340\040\200
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\340\340\200
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\340\240\000
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\340\240\300
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\360\020\200\200
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\360\320\200\200
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\360\220\000\200
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\360\220\300\200
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\360\220\200\000
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\360\220\200\300
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\077\277\277\277\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\377\277\277\277\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\077\277\277\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\377\277\277\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\277\277\077\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\277\277\377\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\277\277\277\077
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\375\277\277\277\277\377
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\376\277\277\277\277\277
"
,
9
)
==
-
1
);
assert
(
charset_mbtowc
(
charset
,
&
wc
,
"
\377\277\277\277\277\277
"
,
9
)
==
-
1
);
/* Encoder */
strcpy
(
s
,
"......."
);
assert
(
charset_wctomb
(
charset
,
s
,
1
<<
31
)
==
-
1
&&
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment