summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ldso/dynlink.c121
-rw-r--r--src/internal/pthread_impl.h1
-rw-r--r--src/ldso/aarch64/tlsdesc.s59
-rw-r--r--src/ldso/arm/tlsdesc.S19
-rw-r--r--src/ldso/i386/tlsdesc.s8
-rw-r--r--src/ldso/x86_64/tlsdesc.s21
-rw-r--r--src/thread/__tls_get_addr.c7
-rw-r--r--src/thread/i386/tls.s8
-rw-r--r--src/thread/pthread_create.c2
9 files changed, 86 insertions, 160 deletions
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index ec921dfd..9e2adb21 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -17,6 +17,7 @@
#include <pthread.h>
#include <ctype.h>
#include <dlfcn.h>
+#include <semaphore.h>
#include "pthread_impl.h"
#include "libc.h"
#include "dynlink.h"
@@ -1338,48 +1339,6 @@ void __init_tls(size_t *auxv)
{
}
-hidden void *__tls_get_new(tls_mod_off_t *v)
-{
- pthread_t self = __pthread_self();
-
- /* Block signals to make accessing new TLS async-signal-safe */
- sigset_t set;
- __block_all_sigs(&set);
- if (v[0] <= self->dtv[0]) {
- __restore_sigs(&set);
- return (void *)(self->dtv[v[0]] + v[1]);
- }
-
- /* This is safe without any locks held because, if the caller
- * is able to request the Nth entry of the DTV, the DSO list
- * must be valid at least that far out and it was synchronized
- * at program startup or by an already-completed call to dlopen. */
- struct dso *p;
- for (p=head; p->tls_id != v[0]; p=p->next);
-
- /* Get new DTV space from new DSO */
- uintptr_t *newdtv = p->new_dtv +
- (v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
- memcpy(newdtv, self->dtv, (self->dtv[0]+1) * sizeof(uintptr_t));
- newdtv[0] = v[0];
- self->dtv = self->dtv_copy = newdtv;
-
- /* Get new TLS memory from all new DSOs up to the requested one */
- unsigned char *mem;
- for (p=head; ; p=p->next) {
- if (!p->tls_id || self->dtv[p->tls_id]) continue;
- mem = p->new_tls + (p->tls.size + p->tls.align)
- * a_fetch_add(&p->new_tls_idx,1);
- mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
- & (p->tls.align-1);
- self->dtv[p->tls_id] = (uintptr_t)mem + DTP_OFFSET;
- memcpy(mem, p->tls.image, p->tls.len);
- if (p->tls_id == v[0]) break;
- }
- __restore_sigs(&set);
- return mem + v[1] + DTP_OFFSET;
-}
-
static void update_tls_size()
{
libc.tls_cnt = tls_cnt;
@@ -1392,6 +1351,82 @@ static void update_tls_size()
tls_align);
}
+void __dl_prepare_for_threads(void)
+{
+ /* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */
+ __syscall(SYS_membarrier, 1<<4, 0);
+}
+
+static sem_t barrier_sem;
+static void bcast_barrier(int s)
+{
+ sem_post(&barrier_sem);
+}
+
+static void install_new_tls(void)
+{
+ sigset_t set;
+ pthread_t self = __pthread_self(), td;
+ uintptr_t (*newdtv)[tls_cnt+1] = (void *)tail->new_dtv;
+ struct dso *p;
+ size_t i, j;
+ size_t old_cnt = self->dtv[0];
+
+ __block_app_sigs(&set);
+ __tl_lock();
+ /* Copy existing dtv contents from all existing threads. */
+ for (i=0, td=self; !i || td!=self; i++, td=td->next) {
+ memcpy(newdtv+i, td->dtv,
+ (old_cnt+1)*sizeof(uintptr_t));
+ newdtv[i][0] = tls_cnt;
+ }
+ /* Install new dtls into the enlarged, uninstalled dtv copies. */
+ for (p=head; ; p=p->next) {
+ if (!p->tls_id || self->dtv[p->tls_id]) continue;
+ unsigned char *mem = p->new_tls;
+ for (j=0; j<i; j++) {
+ unsigned char *new = mem;
+ new += ((uintptr_t)p->tls.image - (uintptr_t)mem)
+ & (p->tls.align-1);
+ memcpy(new, p->tls.image, p->tls.len);
+ newdtv[j][p->tls_id] =
+ (uintptr_t)new + DTP_OFFSET;
+ mem += p->tls.size + p->tls.align;
+ }
+ if (p->tls_id == tls_cnt) break;
+ }
+
+ /* Broadcast barrier to ensure contents of new dtv is visible
+ * if the new dtv pointer is. Use SYS_membarrier if it works,
+ * otherwise emulate with a signal. */
+
+ /* MEMBARRIER_CMD_PRIVATE_EXPEDITED */
+ if (__syscall(SYS_membarrier, 1<<3, 0)) {
+ sem_init(&barrier_sem, 0, 0);
+ struct sigaction sa = {
+ .sa_flags = SA_RESTART,
+ .sa_handler = bcast_barrier
+ };
+ memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
+ __libc_sigaction(SIGSYNCCALL, &sa, 0);
+ for (td=self->next; td!=self; td=td->next)
+ if (j) __syscall(SYS_tkill, td->tid, SIGSYNCCALL);
+ for (td=self->next; td!=self; td=td->next)
+ sem_wait(&barrier_sem);
+ sa.sa_handler = SIG_IGN;
+ __libc_sigaction(SIGSYNCCALL, &sa, 0);
+ sem_destroy(&barrier_sem);
+ }
+
+ /* Install new dtv for each thread. */
+ for (j=0, td=self; !j || td!=self; j++, td=td->next) {
+ td->dtv = td->dtv_copy = newdtv[j];
+ }
+
+ __tl_unlock();
+ __restore_sigs(&set);
+}
+
/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
* following stage 2 and stage 3 functions via primitive symbolic lookup
* since it does not have access to their addresses to begin with. */
@@ -1864,6 +1899,8 @@ void *dlopen(const char *file, int mode)
redo_lazy_relocs();
update_tls_size();
+ if (tls_cnt != orig_tls_cnt)
+ install_new_tls();
_dl_debug_state();
orig_tail = tail;
end:
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index d5d969ec..de089967 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -130,6 +130,7 @@ hidden int __init_tp(void *);
hidden void *__copy_tls(unsigned char *);
hidden void __reset_tls();
+hidden void __dl_prepare_for_threads(void);
hidden void __dl_thread_cleanup(void);
hidden void __testcancel();
hidden void __do_cleanup_push(struct __ptcb *);
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index 8e4004d7..c91baa45 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -29,67 +29,10 @@ __tlsdesc_dynamic:
ldr x0,[x0,#8] // p
ldr x2,[x0] // p->modidx
ldr x3,[x1,#-8] // dtv
- ldr x4,[x3] // dtv[0]
- cmp x2,x4
- b.hi 1f
ldr x2,[x3,x2,lsl #3] // dtv[p->modidx]
ldr x0,[x0,#8] // p->off
add x0,x0,x2
-2: sub x0,x0,x1
+ sub x0,x0,x1
ldp x3,x4,[sp,#16]
ldp x1,x2,[sp],#32
ret
-
- // save all registers __tls_get_new may clobber
- // update sp in two steps because offset must be in [-512,509]
-1: stp x29,x30,[sp,#-160]!
- stp x5,x6,[sp,#16]
- stp x7,x8,[sp,#32]
- stp x9,x10,[sp,#48]
- stp x11,x12,[sp,#64]
- stp x13,x14,[sp,#80]
- stp x15,x16,[sp,#96]
- stp x17,x18,[sp,#112]
- stp q0,q1,[sp,#128]
- stp q2,q3,[sp,#-480]!
- stp q4,q5,[sp,#32]
- stp q6,q7,[sp,#64]
- stp q8,q9,[sp,#96]
- stp q10,q11,[sp,#128]
- stp q12,q13,[sp,#160]
- stp q14,q15,[sp,#192]
- stp q16,q17,[sp,#224]
- stp q18,q19,[sp,#256]
- stp q20,q21,[sp,#288]
- stp q22,q23,[sp,#320]
- stp q24,q25,[sp,#352]
- stp q26,q27,[sp,#384]
- stp q28,q29,[sp,#416]
- stp q30,q31,[sp,#448]
- bl __tls_get_new
- mrs x1,tpidr_el0
- ldp q4,q5,[sp,#32]
- ldp q6,q7,[sp,#64]
- ldp q8,q9,[sp,#96]
- ldp q10,q11,[sp,#128]
- ldp q12,q13,[sp,#160]
- ldp q14,q15,[sp,#192]
- ldp q16,q17,[sp,#224]
- ldp q18,q19,[sp,#256]
- ldp q20,q21,[sp,#288]
- ldp q22,q23,[sp,#320]
- ldp q24,q25,[sp,#352]
- ldp q26,q27,[sp,#384]
- ldp q28,q29,[sp,#416]
- ldp q30,q31,[sp,#448]
- ldp q2,q3,[sp],#480
- ldp x5,x6,[sp,#16]
- ldp x7,x8,[sp,#32]
- ldp x9,x10,[sp,#48]
- ldp x11,x12,[sp,#64]
- ldp x13,x14,[sp,#80]
- ldp x15,x16,[sp,#96]
- ldp x17,x18,[sp,#112]
- ldp q0,q1,[sp,#128]
- ldp x29,x30,[sp],#160
- b 2b
diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S
index 4e67c3e2..455eac1d 100644
--- a/src/ldso/arm/tlsdesc.S
+++ b/src/ldso/arm/tlsdesc.S
@@ -35,13 +35,9 @@ __tlsdesc_dynamic:
#endif
#endif
ldr r3,[r0,#-4] // r3 = dtv
- ldr ip,[r3] // ip = dtv slot count
- cmp r1,ip
- bhi 3f
ldr ip,[r3,r1,LSL #2]
sub r0,ip,r0
add r0,r0,r2 // r0 = r3[r1]-r0+r2
-4:
#if __ARM_ARCH >= 5
pop {r2,r3,ip,pc}
#else
@@ -49,21 +45,6 @@ __tlsdesc_dynamic:
bx lr
#endif
-3:
-#if __ARM_PCS_VFP || !__SOFTFP__
- .fpu vfp
- vpush {d0-d7}
-#endif
- push {r0-r3}
- add r0,sp,#4
- bl __tls_get_new
- pop {r1-r3,ip}
-#if __ARM_PCS_VFP || !__SOFTFP__
- vpop {d0-d7}
-#endif
- sub r0,r0,r1 // r0 = retval-tp
- b 4b
-
#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
#else
diff --git a/src/ldso/i386/tlsdesc.s b/src/ldso/i386/tlsdesc.s
index 4a553bce..a5c0100c 100644
--- a/src/ldso/i386/tlsdesc.s
+++ b/src/ldso/i386/tlsdesc.s
@@ -17,15 +17,9 @@ __tlsdesc_dynamic:
mov %gs:4,%edx
push %ecx
mov (%eax),%ecx
- cmp %ecx,(%edx)
- jc 1f
mov 4(%eax),%eax
add (%edx,%ecx,4),%eax
-2: pop %ecx
+ pop %ecx
sub %gs:0,%eax
pop %edx
ret
-1: push %eax
- call __tls_get_new
- pop %ecx
- jmp 2b
diff --git a/src/ldso/x86_64/tlsdesc.s b/src/ldso/x86_64/tlsdesc.s
index 8238c3eb..0151d15c 100644
--- a/src/ldso/x86_64/tlsdesc.s
+++ b/src/ldso/x86_64/tlsdesc.s
@@ -17,28 +17,9 @@ __tlsdesc_dynamic:
mov %fs:8,%rdx
push %rcx
mov (%rax),%rcx
- cmp %rcx,(%rdx)
- jc 1f
mov 8(%rax),%rax
add (%rdx,%rcx,8),%rax
-2: pop %rcx
+ pop %rcx
sub %fs:0,%rax
pop %rdx
ret
-1: push %rdi
- push %rdi
- push %rsi
- push %r8
- push %r9
- push %r10
- push %r11
- mov %rax,%rdi
- call __tls_get_new
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rsi
- pop %rdi
- pop %rdi
- jmp 2b
diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c
index d7afdabd..19524fe0 100644
--- a/src/thread/__tls_get_addr.c
+++ b/src/thread/__tls_get_addr.c
@@ -1,12 +1,7 @@
-#include <stddef.h>
#include "pthread_impl.h"
void *__tls_get_addr(tls_mod_off_t *v)
{
pthread_t self = __pthread_self();
- if (v[0] <= self->dtv[0])
- return (void *)(self->dtv[v[0]] + v[1]);
- return __tls_get_new(v);
+ return (void *)(self->dtv[v[0]] + v[1]);
}
-
-weak_alias(__tls_get_addr, __tls_get_new);
diff --git a/src/thread/i386/tls.s b/src/thread/i386/tls.s
index 76d5d462..6e4c4cb9 100644
--- a/src/thread/i386/tls.s
+++ b/src/thread/i386/tls.s
@@ -4,14 +4,6 @@
___tls_get_addr:
mov %gs:4,%edx
mov (%eax),%ecx
- cmp %ecx,(%edx)
- jc 1f
mov 4(%eax),%eax
add (%edx,%ecx,4),%eax
ret
-1: push %eax
-.weak __tls_get_new
-.hidden __tls_get_new
- call __tls_get_new
- pop %edx
- ret
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index cec82157..0142b347 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -15,6 +15,7 @@ weak_alias(dummy_0, __release_ptc);
weak_alias(dummy_0, __pthread_tsd_run_dtors);
weak_alias(dummy_0, __do_orphaned_stdio_locks);
weak_alias(dummy_0, __dl_thread_cleanup);
+weak_alias(dummy_0, __dl_prepare_for_threads);
void __tl_lock(void)
{
@@ -235,6 +236,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
init_file_lock(__stderr_used);
__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
self->tsd = (void **)__pthread_tsd_main;
+ __dl_prepare_for_threads();
libc.threaded = 1;
}
if (attrp && !c11) attr = *attrp;