summaryrefslogtreecommitdiff
path: root/src/locale
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2017-11-10 17:06:32 -0500
committerRich Felker <dalias@aerifal.cx>2017-11-10 17:22:43 -0500
commita39f20bf9f8e59573a479bff23df345b2b4d2345 (patch)
treee8b70165af9c19269e1ae95bcf52362ee5baa12a /src/locale
parent5b546faa67544af395d6407553762b37e9711157 (diff)
downloadmusl-a39f20bf9f8e59573a479bff23df345b2b4d2345.tar.gz
add iso-2022-jp support (decoding only) to iconv
this implementation aims to match the baseline defined by rfc1468 (the original mime charset definition) plus the halfwidth katakana extension included in the whatwg definition of the charset. rejection of si/so controls and newlines in doublebyte state are not currently enforced. the jis x 0201 mode is currently interpreted as having the yen sign and overline character in place of backslash and tilde; ascii mode has the standard ascii characters in those slots.
Diffstat (limited to 'src/locale')
-rw-r--r--src/locale/iconv.c47
1 files changed, 45 insertions, 2 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 0696b555..2107b055 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -18,6 +18,7 @@
#define UTF_8 0310
#define EUC_JP 0320
#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
#define GB18030 0330
#define GBK 0331
#define GB2312 0332
@@ -41,6 +42,7 @@ static const unsigned char charmaps[] =
"ascii\0usascii\0iso646\0iso646us\0\0\307"
"eucjp\0\0\320"
"shiftjis\0sjis\0\0\321"
+"iso2022jp\0\0\322"
"gb18030\0\0\330"
"gbk\0\0\331"
"gb2312\0\0\332"
@@ -123,6 +125,7 @@ static size_t extract_to(iconv_t cd)
iconv_t iconv_open(const char *to, const char *from)
{
size_t f, t;
+ struct stateful_cd *scd;
if ((t = find_charmap(to))==-1
|| (f = find_charmap(from))==-1
@@ -132,8 +135,9 @@ iconv_t iconv_open(const char *to, const char *from)
}
iconv_t cd = combine_to_from(t, f);
- if (0) {
- struct stateful_cd *scd = malloc(sizeof *scd);
+ switch (charmaps[f]) {
+ case ISO2022_JP:
+ scd = malloc(sizeof *scd);
if (!scd) return (iconv_t)-1;
scd->base_cd = cd;
scd->state = 0;
@@ -294,6 +298,45 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
c = jis0208[c][d];
if (!c) goto ilseq;
break;
+ case ISO2022_JP:
+ if (c >= 128) goto ilseq;
+ if (c == '\033') {
+ l = 3;
+ if (*inb < 3) goto starved;
+ c = *((unsigned char *)*in + 1);
+ d = *((unsigned char *)*in + 2);
+ if (c != '(' && c != '$') goto ilseq;
+ switch (128*(c=='$') + d) {
+ case 'B': scd->state=0; continue;
+ case 'J': scd->state=1; continue;
+ case 'I': scd->state=4; continue;
+ case 128+'@': scd->state=2; continue;
+ case 128+'B': scd->state=3; continue;
+ }
+ goto ilseq;
+ }
+ switch (scd->state) {
+ case 1:
+ if (c=='\\') c = 0xa5;
+ if (c=='~') c = 0x203e;
+ break;
+ case 2:
+ case 3:
+ l = 2;
+ if (*inb < 2) goto starved;
+ d = *((unsigned char *)*in + 1);
+ c -= 0x21;
+ d -= 0x21;
+ if (c >= 84 || d >= 94) goto ilseq;
+ c = jis0208[c][d];
+ if (!c) goto ilseq;
+ break;
+ case 4:
+ if (c-0x60 < 0x1f) goto ilseq;
+ if (c-0x21 < 0x5e) c += 0xff61-0x21;
+ break;
+ }
+ break;
case GB2312:
if (c < 128) break;
if (c < 0xa1) goto ilseq;