summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ctype/__ctype_get_mb_cur_max.c5
-rw-r--r--src/locale/langinfo.c3
-rw-r--r--src/multibyte/btowc.c6
-rw-r--r--src/multibyte/internal.h7
-rw-r--r--src/multibyte/mbrtowc.c2
-rw-r--r--src/multibyte/mbsrtowcs.c19
-rw-r--r--src/multibyte/mbtowc.c2
-rw-r--r--src/multibyte/wcrtomb.c9
-rw-r--r--src/multibyte/wctob.c4
-rw-r--r--src/regex/fnmatch.c3
10 files changed, 53 insertions, 7 deletions
diff --git a/src/ctype/__ctype_get_mb_cur_max.c b/src/ctype/__ctype_get_mb_cur_max.c
index d235f4da..8e946fc1 100644
--- a/src/ctype/__ctype_get_mb_cur_max.c
+++ b/src/ctype/__ctype_get_mb_cur_max.c
@@ -1,6 +1,7 @@
-#include <stddef.h>
+#include <stdlib.h>
+#include "locale_impl.h"
size_t __ctype_get_mb_cur_max()
{
- return 4;
+ return MB_CUR_MAX;
}
diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c
index a1ada246..776b4478 100644
--- a/src/locale/langinfo.c
+++ b/src/locale/langinfo.c
@@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, locale_t loc)
int idx = item & 65535;
const char *str;
- if (item == CODESET) return "UTF-8";
+ if (item == CODESET)
+ return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8";
switch (cat) {
case LC_NUMERIC:
diff --git a/src/multibyte/btowc.c b/src/multibyte/btowc.c
index 29cb798d..8acd0a2c 100644
--- a/src/multibyte/btowc.c
+++ b/src/multibyte/btowc.c
@@ -1,8 +1,10 @@
#include <stdio.h>
#include <wchar.h>
+#include <stdlib.h>
+#include "internal.h"
wint_t btowc(int c)
{
- c = (unsigned char)c;
- return c<128U ? c : EOF;
+ int b = (unsigned char)c;
+ return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF;
}
diff --git a/src/multibyte/internal.h b/src/multibyte/internal.h
index cc017fa2..53d62eda 100644
--- a/src/multibyte/internal.h
+++ b/src/multibyte/internal.h
@@ -23,3 +23,10 @@ extern const uint32_t bittab[];
#define SA 0xc2u
#define SB 0xf4u
+
+/* Arbitrary encoding for representing code units instead of characters. */
+#define CODEUNIT(c) (0xdfff & (signed char)(c))
+#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80)
+
+/* Get inline definition of MB_CUR_MAX. */
+#include "locale_impl.h"
diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c
index e7b36540..ca7da700 100644
--- a/src/multibyte/mbrtowc.c
+++ b/src/multibyte/mbrtowc.c
@@ -4,6 +4,7 @@
* unnecessary.
*/
+#include <stdlib.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
@@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate
if (!n) return -2;
if (!c) {
if (*s < 0x80) return !!(*wc = *s);
+ if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
if (*s-SA > SB-SA) goto ilseq;
c = bittab[*s++-SA]; n--;
}
diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c
index 3c1343ae..e23083d2 100644
--- a/src/multibyte/mbsrtowcs.c
+++ b/src/multibyte/mbsrtowcs.c
@@ -7,6 +7,8 @@
#include <stdint.h>
#include <wchar.h>
#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
#include "internal.h"
size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
@@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs
}
}
+ if (MB_CUR_MAX==1) {
+ if (!ws) return strlen((const char *)s);
+ for (;;) {
+ if (!wn) {
+ *src = (const void *)s;
+ return wn0;
+ }
+ if (!*s) break;
+ c = *s++;
+ *ws++ = CODEUNIT(c);
+ wn--;
+ }
+ *ws = 0;
+ *src = 0;
+ return wn0-wn;
+ }
+
if (!ws) for (;;) {
if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c
index 803d2213..71a95066 100644
--- a/src/multibyte/mbtowc.c
+++ b/src/multibyte/mbtowc.c
@@ -4,6 +4,7 @@
* unnecessary.
*/
+#include <stdlib.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
@@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n)
if (!wc) wc = &dummy;
if (*s < 0x80) return !!(*wc = *s);
+ if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
if (*s-SA > SB-SA) goto ilseq;
c = bittab[*s++-SA];
diff --git a/src/multibyte/wcrtomb.c b/src/multibyte/wcrtomb.c
index 59f733db..ddc37a57 100644
--- a/src/multibyte/wcrtomb.c
+++ b/src/multibyte/wcrtomb.c
@@ -4,8 +4,10 @@
* unnecessary.
*/
+#include <stdlib.h>
#include <wchar.h>
#include <errno.h>
+#include "internal.h"
size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
{
@@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
if ((unsigned)wc < 0x80) {
*s = wc;
return 1;
+ } else if (MB_CUR_MAX == 1) {
+ if (!IS_CODEUNIT(wc)) {
+ errno = EILSEQ;
+ return -1;
+ }
+ *s = wc;
+ return 1;
} else if ((unsigned)wc < 0x800) {
*s++ = 0xc0 | (wc>>6);
*s = 0x80 | (wc&0x3f);
diff --git a/src/multibyte/wctob.c b/src/multibyte/wctob.c
index d6353ee1..4aeda6a1 100644
--- a/src/multibyte/wctob.c
+++ b/src/multibyte/wctob.c
@@ -1,8 +1,10 @@
-#include <stdio.h>
#include <wchar.h>
+#include <stdlib.h>
+#include "internal.h"
int wctob(wint_t c)
{
if (c < 128U) return c;
+ if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c;
return EOF;
}
diff --git a/src/regex/fnmatch.c b/src/regex/fnmatch.c
index 7f6b65f3..978fff88 100644
--- a/src/regex/fnmatch.c
+++ b/src/regex/fnmatch.c
@@ -18,6 +18,7 @@
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
+#include "locale_impl.h"
#define END 0
#define UNMATCHABLE -2
@@ -229,7 +230,7 @@ static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n
* On illegal sequences we may get it wrong, but in that case
* we necessarily have a matching failure anyway. */
for (s=endstr; s>str && tailcnt; tailcnt--) {
- if (s[-1] < 128U) s--;
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
else while ((unsigned char)*--s-0x80U<0x40 && s>str);
}
if (tailcnt) return FNM_NOMATCH;