diff options
author | Rich Felker <dalias@aerifal.cx> | 2015-05-27 03:22:52 -0400 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2015-05-27 03:27:59 -0400 |
commit | 61a3364d246e72b903da8b76c2e27a225a51351e (patch) | |
tree | 8845c8e1798280285b6f0b27244d978eb602c300 /src/locale | |
parent | 63c188ec42e76ff768e81f6b65b11c68fc43351e (diff) | |
download | musl-61a3364d246e72b903da8b76c2e27a225a51351e.tar.gz |
overhaul locale internals to treat categories roughly uniformly
previously, LC_MESSAGES was treated specially as the only category
which could be set to a locale name without a definition file, in
order to facilitate gettext message translations when no libc locale
was available. LC_NUMERIC was completely un-settable, and LC_CTYPE
stored a flag intended to be used for a possible future byte-based C
locale, instead of storing a __locale_map pointer like the other
categories use.
this patch changes all categories to be represented by pointers to
__locale_map structures, and allows locale names without definition
files to be treated as valid locales with trivial definition when used
in any category. outwardly visible functional changes should be minor,
limited mainly to the strings read back from setlocale and the way
gettext handles translations in categories other than LC_MESSAGES.
various internal refactoring has also been performed, and improvements
in const correctness have been made.
Diffstat (limited to 'src/locale')
-rw-r--r-- | src/locale/__lctrans.c | 2 | ||||
-rw-r--r-- | src/locale/__setlocalecat.c | 116 | ||||
-rw-r--r-- | src/locale/dcngettext.c | 28 | ||||
-rw-r--r-- | src/locale/duplocale.c | 11 | ||||
-rw-r--r-- | src/locale/newlocale.c | 7 | ||||
-rw-r--r-- | src/locale/setlocale.c | 72 |
6 files changed, 107 insertions, 129 deletions
diff --git a/src/locale/__lctrans.c b/src/locale/__lctrans.c index 15994c57..107fe14a 100644 --- a/src/locale/__lctrans.c +++ b/src/locale/__lctrans.c @@ -16,5 +16,5 @@ const char *__lctrans(const char *msg, const struct __locale_map *lm) const char *__lctrans_cur(const char *msg) { - return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES-2]); + return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES]); } diff --git a/src/locale/__setlocalecat.c b/src/locale/__setlocalecat.c index e829da56..30aa7fcc 100644 --- a/src/locale/__setlocalecat.c +++ b/src/locale/__setlocalecat.c @@ -15,24 +15,60 @@ const unsigned char *__map_file(const char *, size_t *); int __munmap(void *, size_t); char *__strchrnul(const char *, int); -static struct __locale_map *findlocale(const char *name, size_t n) +static const char envvars[][12] = { + "LC_CTYPE", + "LC_NUMERIC", + "LC_TIME", + "LC_COLLATE", + "LC_MONETARY", + "LC_MESSAGES", +}; + +static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 }; + +static const struct __locale_map c_dot_utf8 = { + .map = empty_mo, + .map_size = sizeof empty_mo, + .name = "C.UTF-8" +}; + +const struct __locale_map *__get_locale(int cat, const char *val) { static int lock[2]; static void *volatile loc_head; - struct __locale_map *p, *new = 0; + const struct __locale_map *p; + struct __locale_map *new = 0; const char *path = 0, *z; char buf[256]; - size_t l; - const void *map; - size_t map_size; + size_t l, n; + + if (!*val) { + (val = getenv("LC_ALL")) && *val || + (val = getenv(envvars[cat])) && *val || + (val = getenv("LANG")) && *val || + (val = "C.UTF-8"); + } + + /* Limit name length and forbid leading dot or any slashes. */ + for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++); + if (val[0]=='.' || val[n]) val = "C.UTF-8"; + int builtin = (val[0]=='C' && !val[1]) + || !strcmp(val, "C.UTF-8") + || !strcmp(val, "POSIX"); + + if (builtin) { + if (cat == LC_CTYPE && val[1]=='.') + return (void *)&c_dot_utf8; + return 0; + } for (p=loc_head; p; p=p->next) - if (!strcmp(name, p->name)) return p; + if (!strcmp(val, p->name)) return p; LOCK(lock); for (p=loc_head; p; p=p->next) - if (!strcmp(name, p->name)) { + if (!strcmp(val, p->name)) { UNLOCK(lock); return p; } @@ -46,9 +82,10 @@ static struct __locale_map *findlocale(const char *name, size_t n) if (l >= sizeof buf - n - 2) continue; memcpy(buf, path, l); buf[l] = '/'; - memcpy(buf+l+1, name, n); + memcpy(buf+l+1, val, n); buf[l+1+n] = 0; - map = __map_file(buf, &map_size); + size_t map_size; + const void *map = __map_file(buf, &map_size); if (map) { new = malloc(sizeof *new); if (!new) { @@ -57,58 +94,31 @@ static struct __locale_map *findlocale(const char *name, size_t n) } new->map = map; new->map_size = map_size; - memcpy(new->name, name, n); + memcpy(new->name, val, n); new->name[n] = 0; new->next = loc_head; loc_head = new; break; } } - UNLOCK(lock); - return new; -} - -static const char envvars[][12] = { - "LC_CTYPE", - "LC_NUMERIC", - "LC_TIME", - "LC_COLLATE", - "LC_MONETARY", - "LC_MESSAGES", -}; -int __setlocalecat(locale_t loc, int cat, const char *val) -{ - if (!*val) { - (val = getenv("LC_ALL")) && *val || - (val = getenv(envvars[cat])) && *val || - (val = getenv("LANG")) && *val || - (val = "C.UTF-8"); + /* If no locale definition was found, make a locale map + * object anyway to store the name, which is kept for the + * sake of being able to do message translations at the + * application level. */ + if (!new && (new = malloc(sizeof *new))) { + new->map = empty_mo; + new->map_size = sizeof empty_mo; + memcpy(new->name, val, n); + new->name[n] = 0; + new->next = loc_head; + loc_head = new; } - size_t n; - for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++); - if (val[0]=='.' || val[n]) val = "C.UTF-8"; - int builtin = (val[0]=='C' && !val[1]) - || !strcmp(val, "C.UTF-8") - || !strcmp(val, "POSIX"); + /* For LC_CTYPE, never return a null pointer unless the + * requested name was "C" or "POSIX". */ + if (!new && cat == LC_CTYPE) new = (void *)&c_dot_utf8; - switch (cat) { - case LC_CTYPE: - loc->ctype_utf8 = !builtin || val[1]=='.'; - break; - case LC_MESSAGES: - if (builtin) { - loc->messages_name[0] = 0; - } else { - memcpy(loc->messages_name, val, n); - loc->messages_name[n] = 0; - } - /* fall through */ - default: - loc->cat[cat-2] = builtin ? 0 : findlocale(val, n); - case LC_NUMERIC: - break; - } - return 0; + UNLOCK(lock); + return new; } diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index 30dd41d4..a5ff8475 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -84,13 +84,15 @@ char *bindtextdomain(const char *domainname, const char *dirname) } static const char catnames[][12] = { + "LC_CTYPE", + "LC_NUMERIC", "LC_TIME", "LC_COLLATE", "LC_MONETARY", "LC_MESSAGES", }; -static const char catlens[] = { 7, 10, 11, 11 }; +static const char catlens[] = { 8, 10, 7, 10, 11, 11 }; struct msgcat { struct msgcat *next; @@ -117,10 +119,12 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, static struct msgcat *volatile cats; struct msgcat *p; struct __locale_struct *loc = CURRENT_LOCALE; - struct __locale_map *lm; + const struct __locale_map *lm; const char *dirname, *locname, *catname; size_t dirlen, loclen, catlen, domlen; + if ((unsigned)category >= LC_ALL) goto notrans; + if (!domainname) domainname = __gettextdomain(); domlen = strlen(domainname); @@ -129,25 +133,15 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, dirname = gettextdir(domainname, &dirlen); if (!dirname) goto notrans; - switch (category) { - case LC_MESSAGES: - locname = loc->messages_name; - if (!locname || !*locname) goto notrans; - break; - case LC_TIME: - case LC_MONETARY: - case LC_COLLATE: - lm = loc->cat[category-2]; - if (!lm) goto notrans; - locname = lm->name; - break; - default: + lm = loc->cat[category]; + if (!lm) { notrans: return (char *) ((n == 1) ? msgid1 : msgid2); } + locname = lm->name; - catname = catnames[category-2]; - catlen = catlens[category-2]; + catname = catnames[category]; + catlen = catlens[category]; loclen = strlen(locname); size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3; diff --git a/src/locale/duplocale.c b/src/locale/duplocale.c index b87c933e..030b64cb 100644 --- a/src/locale/duplocale.c +++ b/src/locale/duplocale.c @@ -5,17 +5,10 @@ locale_t __duplocale(locale_t old) { - locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1); + locale_t new = malloc(sizeof *new); if (!new) return 0; - new->messages_name = (void *)(new+1); - if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale; - new->ctype_utf8 = old->ctype_utf8; - if (old->messages_name) - strcpy(new->messages_name, old->messages_name); - - for (size_t i=0; i<sizeof new->cat/sizeof new->cat[0]; i++) - new->cat[i] = old->cat[i]; + *new = *old; return new; } diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index 39501d0c..4e0cbd34 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -8,17 +8,16 @@ locale_t __newlocale(int mask, const char *name, locale_t loc) int i; if (!loc) { - loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1); + loc = malloc(sizeof *loc); if (!loc) return 0; - loc->messages_name = (void *)(loc+1); for (i=0; i<LC_ALL; i++) if (!(mask & (1<<i))) - __setlocalecat(loc, i, ""); + loc->cat[i] = __get_locale(i, ""); } for (i=0; i<LC_ALL; i++) if (mask & (1<<i)) - __setlocalecat(loc, i, name); + loc->cat[i] = __get_locale(i, name); return loc; } diff --git a/src/locale/setlocale.c b/src/locale/setlocale.c index 32a8fcab..8dae5a4e 100644 --- a/src/locale/setlocale.c +++ b/src/locale/setlocale.c @@ -5,38 +5,23 @@ #include "libc.h" #include "atomic.h" -static char buf[2+4*(LOCALE_NAME_MAX+1)]; +static char buf[LC_ALL*(LOCALE_NAME_MAX+1)]; static char *setlocale_one_unlocked(int cat, const char *name) { - struct __locale_map *lm; + const struct __locale_map *lm; - if (name) __setlocalecat(&libc.global_locale, cat, name); + if (name) libc.global_locale.cat[cat] = lm = __get_locale(cat, name); + else lm = libc.global_locale.cat[cat]; - switch (cat) { - case LC_CTYPE: - return libc.global_locale.ctype_utf8 ? "C.UTF-8" : "C"; - case LC_NUMERIC: - return "C"; - case LC_MESSAGES: - return libc.global_locale.messages_name[0] - ? libc.global_locale.messages_name : "C"; - default: - lm = libc.global_locale.cat[cat-2]; - return lm ? lm->name : "C"; - } + return lm ? (char *)lm->name : "C"; } +char *__strchrnul(const char *, int); + char *setlocale(int cat, const char *name) { static volatile int lock[2]; - struct __locale_map *lm; - int i, j; - - if (!libc.global_locale.messages_name) { - libc.global_locale.messages_name = - buf + 2 + 3*(LOCALE_NAME_MAX+1); - } if ((unsigned)cat > LC_ALL) return 0; @@ -48,34 +33,31 @@ char *setlocale(int cat, const char *name) * performs both the serialization and deserialization, depends * on the format, so it can easily be changed if needed. */ if (cat == LC_ALL) { + int i; if (name) { - char part[LOCALE_NAME_MAX+1]; - if (name[0] && name[1]==';' - && strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) { - part[0] = name[0]; - part[1] = 0; - setlocale(LC_CTYPE, part); - part[LOCALE_NAME_MAX] = 0; - for (i=LC_TIME; i<LC_MESSAGES; i++) { - memcpy(part, name + 2 + (i-2)*(LOCALE_NAME_MAX+1), LOCALE_NAME_MAX); - for (j=LOCALE_NAME_MAX-1; j && part[j]==';'; j--) - part[j] = 0; - setlocale_one_unlocked(i, part); + char part[LOCALE_NAME_MAX+1] = "C.UTF-8"; + const char *p = name; + for (i=0; i<LC_ALL; i++) { + const char *z = __strchrnul(p, ';'); + if (z-p <= LOCALE_NAME_MAX) { + memcpy(part, p, z-p); + part[z-p] = 0; + if (*z) p = z+1; } - setlocale_one_unlocked(LC_MESSAGES, name - + 2 + 3*(LOCALE_NAME_MAX+1)); - } else { - for (i=0; i<LC_ALL; i++) - setlocale_one_unlocked(i, name); + setlocale_one_unlocked(i, part); } } - memset(buf, ';', 2 + 3*(LOCALE_NAME_MAX+1)); - buf[0] = libc.global_locale.ctype_utf8 ? 'U' : 'C'; - for (i=LC_TIME; i<LC_MESSAGES; i++) { - lm = libc.global_locale.cat[i-2]; - if (lm) memcpy(buf + 2 + (i-2)*(LOCALE_NAME_MAX+1), - lm->name, strlen(lm->name)); + char *s = buf; + for (i=0; i<LC_ALL; i++) { + const struct __locale_map *lm = + libc.global_locale.cat[i]; + const char *part = lm ? lm->name : "C"; + size_t l = strlen(part); + memcpy(s, part, l); + s[l] = ';'; + s += l+1; } + *--s = 0; UNLOCK(lock); return buf; } |