summaryrefslogtreecommitdiff
path: root/arch/arm/src
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2014-11-19 00:40:32 -0500
committerRich Felker <dalias@aerifal.cx>2014-11-19 01:02:01 -0500
commit4a241f14a6bea81b9b50edda09f8184e35a75860 (patch)
treecafc783295719edfa98d5e654e337acfe19ee83e /arch/arm/src
parentd8bdc97d148088bdaa672f56d4b8e0a15b03e70e (diff)
downloadmusl-4a241f14a6bea81b9b50edda09f8184e35a75860.tar.gz
overhaul ARM atomics/tls for performance and compatibility
previously, builds for pre-armv6 targets hard-coded use of the "kuser helper" system for atomics and thread-pointer access, resulting in binaries that fail to run (crash) on systems where this functionality has been disabled (as a security/hardening measure) in the kernel. additionally, builds for armv6 hard-coded an outdated/deprecated memory barrier instruction which may require emulation (extremely slow) on future models. this overhaul replaces the behavior for all pre-armv7 builds (both of the above cases) to perform runtime detection of the appropriate mechanisms for barrier, atomic compare-and-swap, and thread pointer access. detection is based on information provided by the kernel in auxv: presence of the HWCAP_TLS bit for AT_HWCAP and the architecture version encoded in AT_PLATFORM. direct use of the instructions is preferred when possible, since probing for the existence of the kuser helper page would be difficult and would incur runtime cost. for builds targeting armv7 or later, the runtime detection code is not compiled at all, and much more efficient versions of the non-cas atomic operations are provided by using ldrex/strex directly rather than wrapping cas.
Diffstat (limited to 'arch/arm/src')
-rw-r--r--arch/arm/src/__set_thread_area.c50
-rw-r--r--arch/arm/src/arm/atomics.s116
-rw-r--r--arch/arm/src/atomics.c0
3 files changed, 166 insertions, 0 deletions
diff --git a/arch/arm/src/__set_thread_area.c b/arch/arm/src/__set_thread_area.c
new file mode 100644
index 00000000..680510ea
--- /dev/null
+++ b/arch/arm/src/__set_thread_area.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <elf.h>
+#include "pthread_impl.h"
+#include "libc.h"
+
+#define HWCAP_TLS (1 << 15)
+
+extern const unsigned char __attribute__((__visibility__("hidden")))
+ __a_barrier_dummy[], __a_barrier_oldkuser[],
+ __a_barrier_v6[], __a_barrier_v7[],
+ __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
+ __a_gettp_dummy[], __a_gettp_native[];
+
+#define __a_barrier_kuser 0xffff0fa0
+#define __a_cas_kuser 0xffff0fc0
+#define __a_gettp_kuser 0xffff0fe0
+
+extern uintptr_t __attribute__((__visibility__("hidden")))
+ __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
+
+#define SET(op,ver) (__a_##op##_ptr = \
+ (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
+
+int __set_thread_area(void *p)
+{
+#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
+ if (__hwcap & HWCAP_TLS) {
+ size_t *aux;
+ SET(gettp, native);
+ SET(cas, v7);
+ SET(barrier, v7);
+ for (aux=libc.auxv; *aux; aux+=2) {
+ if (*aux != AT_PLATFORM) continue;
+ const char *s = (void *)aux[1];
+ if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
+ SET(cas, v6);
+ SET(barrier, v6);
+ break;
+ }
+ } else {
+ int ver = *(int *)0xffff0ffc;
+ SET(gettp, kuser);
+ SET(cas, kuser);
+ SET(barrier, kuser);
+ if (ver < 2) a_crash();
+ if (ver < 3) SET(barrier, oldkuser);
+ }
+#endif
+ return __syscall(0xf0005, p);
+}
diff --git a/arch/arm/src/arm/atomics.s b/arch/arm/src/arm/atomics.s
new file mode 100644
index 00000000..9fcc7bd7
--- /dev/null
+++ b/arch/arm/src/arm/atomics.s
@@ -0,0 +1,116 @@
+.text
+
+.global __a_barrier
+.hidden __a_barrier
+.type __a_barrier,%function
+__a_barrier:
+ ldr ip,1f
+ ldr ip,[pc,ip]
+ add pc,pc,ip
+1: .word __a_barrier_ptr-1b
+.global __a_barrier_dummy
+.hidden __a_barrier_dummy
+__a_barrier_dummy:
+ tst lr,#1
+ moveq pc,lr
+ bx lr
+.global __a_barrier_oldkuser
+.hidden __a_barrier_oldkuser
+__a_barrier_oldkuser:
+ push {r0,r1,r2,r3,ip,lr}
+ mov r1,r0
+ mov r2,sp
+ ldr ip,=0xffff0fc0
+ mov lr,pc
+ mov pc,ip
+ pop {r0,r1,r2,r3,ip,lr}
+ tst lr,#1
+ moveq pc,lr
+ bx lr
+.global __a_barrier_v6
+.hidden __a_barrier_v6
+__a_barrier_v6:
+ mcr p15,0,r0,c7,c10,5
+ bx lr
+.global __a_barrier_v7
+.hidden __a_barrier_v7
+__a_barrier_v7:
+ .word 0xf57ff05b /* dmb ish */
+ bx lr
+
+.global __a_cas
+.hidden __a_cas
+.type __a_cas,%function
+__a_cas:
+ ldr ip,1f
+ ldr ip,[pc,ip]
+ add pc,pc,ip
+1: .word __a_cas_ptr-1b
+.global __a_cas_dummy
+.hidden __a_cas_dummy
+__a_cas_dummy:
+ mov r3,r0
+ ldr r0,[r2]
+ subs r0,r3,r0
+ streq r1,[r2]
+ tst lr,#1
+ moveq pc,lr
+ bx lr
+.global __a_cas_v6
+.hidden __a_cas_v6
+__a_cas_v6:
+ mov r3,r0
+ mcr p15,0,r0,c7,c10,5
+1: .word 0xe1920f9f /* ldrex r0,[r2] */
+ subs r0,r3,r0
+ .word 0x01820f91 /* strexeq r0,r1,[r2] */
+ teqeq r0,#1
+ beq 1b
+ mcr p15,0,r0,c7,c10,5
+ bx lr
+.global __a_cas_v7
+.hidden __a_cas_v7
+__a_cas_v7:
+ mov r3,r0
+ .word 0xf57ff05b /* dmb ish */
+1: .word 0xe1920f9f /* ldrex r0,[r2] */
+ subs r0,r3,r0
+ .word 0x01820f91 /* strexeq r0,r1,[r2] */
+ teqeq r0,#1
+ beq 1b
+ .word 0xf57ff05b /* dmb ish */
+ bx lr
+
+.global __a_gettp
+.hidden __a_gettp
+.type __a_gettp,%function
+__a_gettp:
+ ldr r0,1f
+ ldr r0,[pc,r0]
+ add pc,pc,r0
+1: .word __a_gettp_ptr-1b
+.global __a_gettp_dummy
+.hidden __a_gettp_dummy
+__a_gettp_dummy:
+ .word 0xe7fddef1
+.global __a_gettp_native
+.hidden __a_gettp_native
+__a_gettp_native:
+ mrc p15,0,r0,c13,c0,3
+ bx lr
+
+.data
+.global __a_barrier_ptr
+.hidden __a_barrier_ptr
+__a_barrier_ptr:
+ .word 0
+
+.global __a_cas_ptr
+.hidden __a_cas_ptr
+__a_cas_ptr:
+ .word 0
+
+.global __a_gettp_ptr
+.hidden __a_gettp_ptr
+__a_gettp_ptr:
+ .word 0
diff --git a/arch/arm/src/atomics.c b/arch/arm/src/atomics.c
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/arch/arm/src/atomics.c