From d56460c939c94a6c547abe8238f442b8de10bfbd Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 12 Nov 2015 15:50:26 -0500 Subject: unify static and dynamic linked implementations of thread-local storage this both allows removal of some of the main remaining uses of the SHARED macro and clears one obstacle to static-linked dlopen support, which may be added at some point in the future. specialized single-TLS-module versions of __copy_tls and __reset_tls are removed and replaced with code adapted from their dynamic-linked versions, capable of operating on a whole chain of TLS modules, and use of the dynamic linker's DSO chain (which contains large struct dso objects) by these functions is replaced with a new chain of struct tls_module objects containing only the information needed for implementing TLS. this may also yield some performance benefit initializing TLS for a new thread when a large number of modules without TLS have been loaded, since since there is no need to walk structures for modules without TLS. --- src/env/__init_tls.c | 71 ++++++++++++++----------- src/env/__reset_tls.c | 23 ++++----- src/internal/libc.h | 9 +++- src/ldso/dynlink.c | 140 +++++++++++++++++++------------------------------- 4 files changed, 111 insertions(+), 132 deletions(-) diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c index 73551e6c..0107a545 100644 --- a/src/env/__init_tls.c +++ b/src/env/__init_tls.c @@ -8,9 +8,6 @@ #include "atomic.h" #include "syscall.h" -#ifndef SHARED -static -#endif int __init_tp(void *p) { pthread_t td = p; @@ -24,8 +21,6 @@ int __init_tp(void *p) return 0; } -#ifndef SHARED - static struct builtin_tls { char c; struct pthread pt; @@ -33,33 +28,40 @@ static struct builtin_tls { } builtin_tls[1]; #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt) -struct tls_image { - void *image; - size_t len, size, align; -} __static_tls; - -#define T __static_tls +static struct tls_module main_tls; void *__copy_tls(unsigned char *mem) { pthread_t td; - if (!T.image) return mem; - void **dtv = (void *)mem; - dtv[0] = (void *)1; + struct tls_module *p; + size_t i; + void **dtv; + #ifdef TLS_ABOVE_TP - mem += sizeof(void *) * 2; - mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1); + dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1); + + mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1); td = (pthread_t)mem; mem += sizeof(struct pthread); + + for (i=1, p=libc.tls_head; p; i++, p=p->next) { + dtv[i] = mem + p->offset; + memcpy(dtv[i], p->image, p->len); + } #else + dtv = (void **)mem; + mem += libc.tls_size - sizeof(struct pthread); - mem -= (uintptr_t)mem & (T.align-1); + mem -= (uintptr_t)mem & (libc.tls_align-1); td = (pthread_t)mem; - mem -= T.size; + + for (i=1, p=libc.tls_head; p; i++, p=p->next) { + dtv[i] = mem - p->offset; + memcpy(dtv[i], p->image, p->len); + } #endif + dtv[0] = (void *)libc.tls_cnt; td->dtv = td->dtv_copy = dtv; - dtv[1] = mem; - memcpy(mem, T.image, T.len); return td; } @@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr; typedef Elf64_Phdr Phdr; #endif -void __init_tls(size_t *aux) +static void static_init_tls(size_t *aux) { unsigned char *p; size_t n; @@ -86,16 +88,24 @@ void __init_tls(size_t *aux) } if (tls_phdr) { - T.image = (void *)(base + tls_phdr->p_vaddr); - T.len = tls_phdr->p_filesz; - T.size = tls_phdr->p_memsz; - T.align = tls_phdr->p_align; + main_tls.image = (void *)(base + tls_phdr->p_vaddr); + main_tls.len = tls_phdr->p_filesz; + main_tls.size = tls_phdr->p_memsz; + main_tls.align = tls_phdr->p_align; + libc.tls_cnt = 1; + libc.tls_head = &main_tls; } - T.size += (-T.size - (uintptr_t)T.image) & (T.align-1); - if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN; + main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image) + & (main_tls.align-1); + if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN; +#ifndef TLS_ABOVE_TP + main_tls.offset = main_tls.size; +#endif - libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread) + libc.tls_align = main_tls.align; + libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread) + + main_tls.size + main_tls.align + MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN; if (libc.tls_size > sizeof builtin_tls) { @@ -117,6 +127,5 @@ void __init_tls(size_t *aux) if (__init_tp(__copy_tls(mem)) < 0) a_crash(); } -#else -void __init_tls(size_t *auxv) { } -#endif + +weak_alias(static_init_tls, __init_tls); diff --git a/src/env/__reset_tls.c b/src/env/__reset_tls.c index bd61f311..677e57f5 100644 --- a/src/env/__reset_tls.c +++ b/src/env/__reset_tls.c @@ -1,21 +1,16 @@ -#ifndef SHARED - #include #include "pthread_impl.h" - -extern struct tls_image { - void *image; - size_t len, size, align; -} __static_tls; - -#define T __static_tls +#include "libc.h" void __reset_tls() { - if (!T.size) return; pthread_t self = __pthread_self(); - memcpy(self->dtv[1], T.image, T.len); - memset((char *)self->dtv[1]+T.len, 0, T.size-T.len); + struct tls_module *p; + size_t i, n = (size_t)self->dtv[0]; + if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) { + if (!self->dtv[i]) continue; + memcpy(self->dtv[i], p->image, p->len); + memset((char *)self->dtv[i]+p->len, 0, + p->size - p->len); + } } - -#endif diff --git a/src/internal/libc.h b/src/internal/libc.h index 98c7535a..5e145183 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -11,13 +11,20 @@ struct __locale_struct { const struct __locale_map *volatile cat[6]; }; +struct tls_module { + struct tls_module *next; + void *image; + size_t len, size, align, offset; +}; + struct __libc { int can_do_threads; int threaded; int secure; volatile int threads_minus_1; size_t *auxv; - size_t tls_size; + struct tls_module *tls_head; + size_t tls_size, tls_align, tls_cnt; size_t page_size; struct __locale_struct global_locale; }; diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c index ac755d94..0326baf0 100644 --- a/src/ldso/dynlink.c +++ b/src/ldso/dynlink.c @@ -70,8 +70,8 @@ struct dso { char kernel_mapped; struct dso **deps, *needed_by; char *rpath_orig, *rpath; - void *tls_image; - size_t tls_len, tls_size, tls_align, tls_id, tls_offset; + struct tls_module tls; + size_t tls_id; size_t relro_start, relro_end; void **new_dtv; unsigned char *new_tls; @@ -99,6 +99,7 @@ struct symdef { int __init_tp(void *); void __init_libc(char **, char *); +void *__copy_tls(unsigned char *); const char *__libc_get_version(void); @@ -123,6 +124,7 @@ static int noload; static jmp_buf *rtld_fail; static pthread_rwlock_t lock; static struct debug debug; +static struct tls_module *tls_tail; static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN; static size_t static_tls_cnt; static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE }; @@ -397,14 +399,14 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri break; #ifdef TLS_ABOVE_TP case REL_TPOFF: - *reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend; + *reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend; break; #else case REL_TPOFF: - *reloc_addr = tls_val - def.dso->tls_offset + addend; + *reloc_addr = tls_val - def.dso->tls.offset + addend; break; case REL_TPOFF_NEG: - *reloc_addr = def.dso->tls_offset - tls_val + addend; + *reloc_addr = def.dso->tls.offset - tls_val + addend; break; #endif case REL_TLSDESC: @@ -426,10 +428,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri } else { reloc_addr[0] = (size_t)__tlsdesc_static; #ifdef TLS_ABOVE_TP - reloc_addr[1] = tls_val + def.dso->tls_offset + reloc_addr[1] = tls_val + def.dso->tls.offset + TPOFF_K + addend; #else - reloc_addr[1] = tls_val - def.dso->tls_offset + reloc_addr[1] = tls_val - def.dso->tls.offset + addend; #endif } @@ -567,9 +569,9 @@ static void *map_library(int fd, struct dso *dso) dyn = ph->p_vaddr; } else if (ph->p_type == PT_TLS) { tls_image = ph->p_vaddr; - dso->tls_align = ph->p_align; - dso->tls_len = ph->p_filesz; - dso->tls_size = ph->p_memsz; + dso->tls.align = ph->p_align; + dso->tls.len = ph->p_filesz; + dso->tls.size = ph->p_memsz; } else if (ph->p_type == PT_GNU_RELRO) { dso->relro_start = ph->p_vaddr & -PAGE_SIZE; dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE; @@ -694,7 +696,7 @@ static void *map_library(int fd, struct dso *dso) done_mapping: dso->base = base; dso->dynv = laddr(dso, dyn); - if (dso->tls_size) dso->tls_image = laddr(dso, tls_image); + if (dso->tls.size) dso->tls.image = laddr(dso, tls_image); if (!runtime) reclaim_gaps(dso); free(allocated_buf); return map; @@ -1011,8 +1013,8 @@ static struct dso *load_library(const char *name, struct dso *needed_by) * extended DTV capable of storing an additional slot for * the newly-loaded DSO. */ alloc_size = sizeof *p + strlen(pathname) + 1; - if (runtime && temp_dso.tls_image) { - size_t per_th = temp_dso.tls_size + temp_dso.tls_align + if (runtime && temp_dso.tls.image) { + size_t per_th = temp_dso.tls.size + temp_dso.tls.align + sizeof(void *) * (tls_cnt+3); n_th = libc.threads_minus_1 + 1; if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX; @@ -1033,22 +1035,25 @@ static struct dso *load_library(const char *name, struct dso *needed_by) strcpy(p->name, pathname); /* Add a shortname only if name arg was not an explicit pathname. */ if (pathname != name) p->shortname = strrchr(p->name, '/')+1; - if (p->tls_image) { + if (p->tls.image) { p->tls_id = ++tls_cnt; - tls_align = MAXP2(tls_align, p->tls_align); + tls_align = MAXP2(tls_align, p->tls.align); #ifdef TLS_ABOVE_TP - p->tls_offset = tls_offset + ( (tls_align-1) & - -(tls_offset + (uintptr_t)p->tls_image) ); - tls_offset += p->tls_size; + p->tls.offset = tls_offset + ( (tls_align-1) & + -(tls_offset + (uintptr_t)p->tls.image) ); + tls_offset += p->tls.size; #else - tls_offset += p->tls_size + p->tls_align - 1; - tls_offset -= (tls_offset + (uintptr_t)p->tls_image) - & (p->tls_align-1); - p->tls_offset = tls_offset; + tls_offset += p->tls.size + p->tls.align - 1; + tls_offset -= (tls_offset + (uintptr_t)p->tls.image) + & (p->tls.align-1); + p->tls.offset = tls_offset; #endif p->new_dtv = (void *)(-sizeof(size_t) & (uintptr_t)(p->name+strlen(p->name)+sizeof(size_t))); p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1)); + if (tls_tail) tls_tail->next = &p->tls; + else libc.tls_head = &p->tls; + tls_tail = &p->tls; } tail->next = p; @@ -1238,53 +1243,8 @@ static void dl_debug_state(void) weak_alias(dl_debug_state, _dl_debug_state); -void __reset_tls() +void __init_tls(size_t *auxv) { - pthread_t self = __pthread_self(); - struct dso *p; - for (p=head; p; p=p->next) { - if (!p->tls_id || !self->dtv[p->tls_id]) continue; - memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len); - memset((char *)self->dtv[p->tls_id]+p->tls_len, 0, - p->tls_size - p->tls_len); - if (p->tls_id == (size_t)self->dtv[0]) break; - } -} - -void *__copy_tls(unsigned char *mem) -{ - pthread_t td; - struct dso *p; - void **dtv; - -#ifdef TLS_ABOVE_TP - dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1); - - mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1); - td = (pthread_t)mem; - mem += sizeof(struct pthread); - - for (p=head; p; p=p->next) { - if (!p->tls_id) continue; - dtv[p->tls_id] = mem + p->tls_offset; - memcpy(dtv[p->tls_id], p->tls_image, p->tls_len); - } -#else - dtv = (void **)mem; - - mem += libc.tls_size - sizeof(struct pthread); - mem -= (uintptr_t)mem & (tls_align-1); - td = (pthread_t)mem; - - for (p=head; p; p=p->next) { - if (!p->tls_id) continue; - dtv[p->tls_id] = mem - p->tls_offset; - memcpy(dtv[p->tls_id], p->tls_image, p->tls_len); - } -#endif - dtv[0] = (void *)tls_cnt; - td->dtv = td->dtv_copy = dtv; - return td; } __attribute__((__visibility__("hidden"))) @@ -1321,12 +1281,12 @@ void *__tls_get_new(size_t *v) unsigned char *mem; for (p=head; ; p=p->next) { if (!p->tls_id || self->dtv[p->tls_id]) continue; - mem = p->new_tls + (p->tls_size + p->tls_align) + mem = p->new_tls + (p->tls.size + p->tls.align) * a_fetch_add(&p->new_tls_idx,1); - mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) - & (p->tls_align-1); + mem += ((uintptr_t)p->tls.image - (uintptr_t)mem) + & (p->tls.align-1); self->dtv[p->tls_id] = mem; - memcpy(mem, p->tls_image, p->tls_len); + memcpy(mem, p->tls.image, p->tls.len); if (p->tls_id == v[0]) break; } __restore_sigs(&set); @@ -1335,6 +1295,8 @@ void *__tls_get_new(size_t *v) static void update_tls_size() { + libc.tls_cnt = tls_cnt; + libc.tls_align = tls_align; libc.tls_size = ALIGN( (1+tls_cnt) * sizeof(void *) + tls_offset + @@ -1445,6 +1407,7 @@ _Noreturn void __dls3(size_t *sp) * use during dynamic linking. If possible it will also serve as the * thread pointer at runtime. */ libc.tls_size = sizeof builtin_tls; + libc.tls_align = tls_align; if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) { a_crash(); } @@ -1472,13 +1435,13 @@ _Noreturn void __dls3(size_t *sp) interp_off = (size_t)phdr->p_vaddr; else if (phdr->p_type == PT_TLS) { tls_image = phdr->p_vaddr; - app.tls_len = phdr->p_filesz; - app.tls_size = phdr->p_memsz; - app.tls_align = phdr->p_align; + app.tls.len = phdr->p_filesz; + app.tls.size = phdr->p_memsz; + app.tls.align = phdr->p_align; } } if (DL_FDPIC) app.loadmap = app_loadmap; - if (app.tls_size) app.tls_image = laddr(&app, tls_image); + if (app.tls.size) app.tls.image = laddr(&app, tls_image); if (interp_off) ldso.name = laddr(&app, interp_off); if ((aux[0] & (1UL<tls_id; - info.dlpi_tls_data = current->tls_image; + info.dlpi_tls_data = current->tls.image; ret = (callback)(&info, sizeof (info), data); -- cgit v1.2.1