diff options
-rw-r--r-- | src/ctype/__ctype_get_mb_cur_max.c | 5 | ||||
-rw-r--r-- | src/locale/langinfo.c | 3 | ||||
-rw-r--r-- | src/multibyte/btowc.c | 6 | ||||
-rw-r--r-- | src/multibyte/internal.h | 7 | ||||
-rw-r--r-- | src/multibyte/mbrtowc.c | 2 | ||||
-rw-r--r-- | src/multibyte/mbsrtowcs.c | 19 | ||||
-rw-r--r-- | src/multibyte/mbtowc.c | 2 | ||||
-rw-r--r-- | src/multibyte/wcrtomb.c | 9 | ||||
-rw-r--r-- | src/multibyte/wctob.c | 4 | ||||
-rw-r--r-- | src/regex/fnmatch.c | 3 |
10 files changed, 53 insertions, 7 deletions
diff --git a/src/ctype/__ctype_get_mb_cur_max.c b/src/ctype/__ctype_get_mb_cur_max.c index d235f4da..8e946fc1 100644 --- a/src/ctype/__ctype_get_mb_cur_max.c +++ b/src/ctype/__ctype_get_mb_cur_max.c @@ -1,6 +1,7 @@ -#include <stddef.h> +#include <stdlib.h> +#include "locale_impl.h" size_t __ctype_get_mb_cur_max() { - return 4; + return MB_CUR_MAX; } diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c index a1ada246..776b4478 100644 --- a/src/locale/langinfo.c +++ b/src/locale/langinfo.c @@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, locale_t loc) int idx = item & 65535; const char *str; - if (item == CODESET) return "UTF-8"; + if (item == CODESET) + return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8"; switch (cat) { case LC_NUMERIC: diff --git a/src/multibyte/btowc.c b/src/multibyte/btowc.c index 29cb798d..8acd0a2c 100644 --- a/src/multibyte/btowc.c +++ b/src/multibyte/btowc.c @@ -1,8 +1,10 @@ #include <stdio.h> #include <wchar.h> +#include <stdlib.h> +#include "internal.h" wint_t btowc(int c) { - c = (unsigned char)c; - return c<128U ? c : EOF; + int b = (unsigned char)c; + return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF; } diff --git a/src/multibyte/internal.h b/src/multibyte/internal.h index cc017fa2..53d62eda 100644 --- a/src/multibyte/internal.h +++ b/src/multibyte/internal.h @@ -23,3 +23,10 @@ extern const uint32_t bittab[]; #define SA 0xc2u #define SB 0xf4u + +/* Arbitrary encoding for representing code units instead of characters. */ +#define CODEUNIT(c) (0xdfff & (signed char)(c)) +#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80) + +/* Get inline definition of MB_CUR_MAX. */ +#include "locale_impl.h" diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c index e7b36540..ca7da700 100644 --- a/src/multibyte/mbrtowc.c +++ b/src/multibyte/mbrtowc.c @@ -4,6 +4,7 @@ * unnecessary. */ +#include <stdlib.h> #include <wchar.h> #include <errno.h> #include "internal.h" @@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate if (!n) return -2; if (!c) { if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1; if (*s-SA > SB-SA) goto ilseq; c = bittab[*s++-SA]; n--; } diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c index 3c1343ae..e23083d2 100644 --- a/src/multibyte/mbsrtowcs.c +++ b/src/multibyte/mbsrtowcs.c @@ -7,6 +7,8 @@ #include <stdint.h> #include <wchar.h> #include <errno.h> +#include <string.h> +#include <stdlib.h> #include "internal.h" size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st) @@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs } } + if (MB_CUR_MAX==1) { + if (!ws) return strlen((const char *)s); + for (;;) { + if (!wn) { + *src = (const void *)s; + return wn0; + } + if (!*s) break; + c = *s++; + *ws++ = CODEUNIT(c); + wn--; + } + *ws = 0; + *src = 0; + return wn0-wn; + } + if (!ws) for (;;) { if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c index 803d2213..71a95066 100644 --- a/src/multibyte/mbtowc.c +++ b/src/multibyte/mbtowc.c @@ -4,6 +4,7 @@ * unnecessary. */ +#include <stdlib.h> #include <wchar.h> #include <errno.h> #include "internal.h" @@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) if (!wc) wc = &dummy; if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1; if (*s-SA > SB-SA) goto ilseq; c = bittab[*s++-SA]; diff --git a/src/multibyte/wcrtomb.c b/src/multibyte/wcrtomb.c index 59f733db..ddc37a57 100644 --- a/src/multibyte/wcrtomb.c +++ b/src/multibyte/wcrtomb.c @@ -4,8 +4,10 @@ * unnecessary. */ +#include <stdlib.h> #include <wchar.h> #include <errno.h> +#include "internal.h" size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st) { @@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st) if ((unsigned)wc < 0x80) { *s = wc; return 1; + } else if (MB_CUR_MAX == 1) { + if (!IS_CODEUNIT(wc)) { + errno = EILSEQ; + return -1; + } + *s = wc; + return 1; } else if ((unsigned)wc < 0x800) { *s++ = 0xc0 | (wc>>6); *s = 0x80 | (wc&0x3f); diff --git a/src/multibyte/wctob.c b/src/multibyte/wctob.c index d6353ee1..4aeda6a1 100644 --- a/src/multibyte/wctob.c +++ b/src/multibyte/wctob.c @@ -1,8 +1,10 @@ -#include <stdio.h> #include <wchar.h> +#include <stdlib.h> +#include "internal.h" int wctob(wint_t c) { if (c < 128U) return c; + if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c; return EOF; } diff --git a/src/regex/fnmatch.c b/src/regex/fnmatch.c index 7f6b65f3..978fff88 100644 --- a/src/regex/fnmatch.c +++ b/src/regex/fnmatch.c @@ -18,6 +18,7 @@ #include <stdlib.h> #include <wchar.h> #include <wctype.h> +#include "locale_impl.h" #define END 0 #define UNMATCHABLE -2 @@ -229,7 +230,7 @@ static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n * On illegal sequences we may get it wrong, but in that case * we necessarily have a matching failure anyway. */ for (s=endstr; s>str && tailcnt; tailcnt--) { - if (s[-1] < 128U) s--; + if (s[-1] < 128U || MB_CUR_MAX==1) s--; else while ((unsigned char)*--s-0x80U<0x40 && s>str); } if (tailcnt) return FNM_NOMATCH; |