diff --git a/ChangeLog b/ChangeLog index b49e1c42af880e575c48c151b27bf082aa5a0aec..5d6bf41cad93737677076feec62de47145e7c1f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2015-01-23 Niels Möller <nisse@lysator.liu.se> + + * fat-setup.h (DEFINE_FAT_FUNC): Check value of function pointer, + before calling fat_init. Should be correct even without memory + barrier. + * fat-x86_64.c (fat_init): Deleted static variable initialized. + The checks of the relevant pointer in DEFINE_FAT_FUNC is more + robust. + * fat-arm.c (fat_init): Likewise. + 2015-01-21 Niels Möller <nisse@lysator.liu.se> * fat-arm.c (fat_init): Setup for use of neon assembly functions. diff --git a/fat-arm.c b/fat-arm.c index e351fdcba8f336e7fc213353d37801f2c3f2025e..6d6accfafc53791b341477a105cee29761b68b45 100644 --- a/fat-arm.c +++ b/fat-arm.c @@ -164,13 +164,9 @@ DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, neon); static void CONSTRUCTOR fat_init (void) { - static volatile int initialized = 0; struct arm_features features; int verbose; - if (initialized) - return; - get_arm_features (&features); verbose = getenv (ENV_VERBOSE) != NULL; @@ -213,8 +209,6 @@ fat_init (void) _nettle_umac_nh_vec = _nettle_umac_nh_c; _nettle_umac_nh_n_vec = _nettle_umac_nh_n_c; } - /* FIXME: Needs memory barrier, to enforce store ordering. */ - initialized = 1; } DEFINE_FAT_FUNC(_nettle_aes_encrypt, void, diff --git a/fat-setup.h b/fat-setup.h index ea78a188b69f836c675aa10c37b0b495b7733e88..7c0bbf6654281f9d640c284ba528717c5a0e3d13 100644 --- a/fat-setup.h +++ b/fat-setup.h @@ -30,10 +30,11 @@ */ /* Fat library initialization works as follows. The main function is - fat_init. It tries to do initialization only once, but since it is - idempotent and pointer updates are atomic on x86_64, there's no - harm if it is in some cases called multiple times from several - threads. + fat_init. We try to do initialization only once, but since it is + idempotent, there's no harm if it is in some cases called multiple + times from several threads. For correctness, we rely on atomic + writes, but not on memory barriers or any other synchronization + mechanism. The fat_init function checks the cpuid flags, and sets function pointers, e.g, _nettle_aes_encrypt_vec, to point to the appropriate @@ -47,18 +48,24 @@ For the actual indirection, there are two cases. - If ifunc support is available, function pointers are statically - initialized to NULL, and we register resolver functions, e.g., - _nettle_aes_encrypt_resolve, which call fat_init, and then return - the function pointer, e.g., the value of _nettle_aes_encrypt_vec. - - If ifunc is not available, we have to define a wrapper function to - jump via the function pointer. (FIXME: For internal calls, we could - do this as a macro). We statically initialize each function pointer - to point to a special initialization function, e.g., - _nettle_aes_encrypt_init, which calls fat_init, and then invokes - the right function. This way, all pointers are setup correctly at - the first call to any fat function. + * If ifunc support is available, function pointers are statically + initialized to NULL, and we register resolver functions, e.g., + _nettle_aes_encrypt_resolve, which call fat_init, and then return + the function pointer, e.g., the value of _nettle_aes_encrypt_vec. + + * If ifunc is not available, we have to define a wrapper function + to jump via the function pointer. (FIXME: For internal calls, we + could do this as a macro). + + We statically initialize each function pointer to point to a + special initialization function, e.g., _nettle_aes_encrypt_init, + which calls fat_init, and then invokes the right function. This + way, all pointers are setup correctly at the first call to any + fat function. + + And atomic writes are required for correctness in the case that + several threads do "first call to any fat function" at the same + time. */ #if HAVE_GCC_ATTRIBUTE @@ -107,7 +114,8 @@ { \ if (getenv (ENV_VERBOSE)) \ fprintf (stderr, "libnettle: "#name"_resolve\n"); \ - fat_init(); \ + if (!name##_vec) \ + fat_init(); \ return (void_func *) name##_vec; \ } @@ -125,7 +133,8 @@ static rtype name##_init prototype { \ if (getenv (ENV_VERBOSE)) \ fprintf (stderr, "libnettle: "#name"_init\n"); \ - fat_init(); \ + if (name##_vec == name##_init) \ + fat_init(); \ assert (name##_vec != name##_init); \ return name##_vec args; \ } diff --git a/fat-x86_64.c b/fat-x86_64.c index 20be1c57f6a944c0baa7e0aaafde40c487681f38..2e97d1e5d185d00a2bc9cff93239164a7ce7b9f4 100644 --- a/fat-x86_64.c +++ b/fat-x86_64.c @@ -122,11 +122,8 @@ DECLARE_FAT_FUNC_VAR(memxor, memxor_func, sse2) static void CONSTRUCTOR fat_init (void) { - static volatile int initialized = 0; struct x86_features features; int verbose; - if (initialized) - return; /* FIXME: Replace all getenv calls by getenv_secure? */ verbose = getenv (ENV_VERBOSE) != NULL; @@ -169,11 +166,6 @@ fat_init (void) fprintf (stderr, "libnettle: intel SSE2 will not be used for memxor.\n"); nettle_memxor_vec = _nettle_memxor_x86_64; } - - /* The x86_64 architecture should always make stores visible in the - right order to other processors (except for non-temporal stores - and the like). So we don't need any memory barrier. */ - initialized = 1; } DEFINE_FAT_FUNC(_nettle_aes_encrypt, void,