diff --git a/CMakeLists.txt b/CMakeLists.txt index 446ec6e..1043c9a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,8 @@ project (jansson C) # Options OPTION (BUILD_SHARED_LIBS "Build shared libraries." OFF) +OPTION (USE_URANDOM "Use /dev/urandom to seed the hash function." ON) +OPTION (USE_WINDOWS_CRYPTOAPI "Use CryptGenRandom to seed the hash function." ON) if (MSVC) # This option must match the settings used in your program, in particular if you @@ -85,12 +87,12 @@ set (JANSSON_SOVERSION 4) # for CheckFunctionKeywords set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +INCLUDE (CheckCSourceCompiles) include (CheckFunctionExists) include (CheckFunctionKeywords) include (CheckIncludeFiles) include (CheckTypeSize) - if (MSVC) # Turn off Microsofts "security" warnings. add_definitions( "/W3 /D_CRT_SECURE_NO_WARNINGS /wd4005 /wd4996 /nologo" ) @@ -106,14 +108,25 @@ if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_C_FLAGS "-fPIC") endif() - +check_include_files (endian.h HAVE_ENDIAN_H) +check_include_files (fcntl.h HAVE_FCNTL_H) +check_include_files (sched.h HAVE_SCHED_H) +check_include_files (unistd.h HAVE_UNISTD_H) +check_include_files (sys/param.h HAVE_SYS_PARAM_H) +check_include_files (sys/stat.h HAVE_SYS_STAT_H) +check_include_files (sys/time.h HAVE_SYS_TIME_H) +check_include_files (sys/time.h HAVE_SYS_TYPES_H) + +check_function_exists (close HAVE_CLOSE) +check_function_exists (getpid HAVE_GETPID) +check_function_exists (gettimeofday HAVE_GETTIMEOFDAY) +check_function_exists (open HAVE_OPEN) +check_function_exists (read HAVE_READ) +check_function_exists (sched_yield HAVE_SCHED_YIELD) # Check for the int-type includes -check_include_files (sys/types.h HAVE_SYS_TYPES_H) -check_include_files (inttypes.h HAVE_INTTYPES_H) check_include_files (stdint.h HAVE_STDINT_H) - # Check our 64 bit integer sizes check_type_size (__int64 __INT64) check_type_size (int64_t INT64_T) @@ -124,17 +137,32 @@ check_type_size (int32_t INT32_T) check_type_size (__int32 __INT32) check_type_size ("long" LONG_INT) check_type_size ("int" INT) - if (HAVE_INT32_T) set (JSON_INT32 int32_t) elseif (HAVE___INT32) set (JSON_INT32 __int32) -elseif (HAVE_LONG AND (${LONG_INT} EQUAL 4)) +elseif (HAVE_LONG_INT AND (${LONG_INT} EQUAL 4)) set (JSON_INT32 long) elseif (HAVE_INT AND (${INT} EQUAL 4)) set (JSON_INT32 int) else () - message (FATAL_ERROR "Could not detect a valid 32 bit integer type") + message (FATAL_ERROR "Could not detect a valid 32-bit integer type") +endif () + +check_type_size (uint32_t UINT32_T) +check_type_size (__uint32 __UINT32) +check_type_size ("unsigned long" UNSIGNED_LONG_INT) +check_type_size ("unsigned int" UNSIGNED_INT) +if (HAVE_UINT32_T) + set (JSON_UINT32 uint32_t) +elseif (HAVE___UINT32) + set (JSON_UINT32 __uint32) +elseif (HAVE_UNSIGNED_LONG_INT AND (${UNSIGNED_LONG_INT} EQUAL 4)) + set (JSON_UINT32 "unsigned long") +elseif (HAVE_UNSIGNED_INT AND (${UNSIGNED_INT} EQUAL 4)) + set (JSON_UINT32 "unsigned int") +else () + message (FATAL_ERROR "Could not detect a valid unsigned 32-bit integer type") endif () # Check for ssize_t and SSIZE_T existance. @@ -206,11 +234,9 @@ else () set (JSON_HAVE_LOCALECONV 0) endif () - # check if we have setlocale check_function_exists (setlocale HAVE_SETLOCALE) - # Check what the inline keyword is. # Note that the original JSON_INLINE was always set to just 'inline', so this goes further. check_function_keywords("inline") @@ -238,6 +264,9 @@ elseif (HAVE__SNPRINTF) set (JSON_SNPRINTF _snprintf) endif () +check_c_source_compiles ("int main() { unsigned long val; __sync_bool_compare_and_swap(&val, 0, 1); return 0; } " HAVE_SYNC_BUILTINS) +check_c_source_compiles ("int main() { char l; unsigned long v; __atomic_test_and_set(&l, __ATOMIC_RELAXED); __atomic_store_n(&v, 1, __ATOMIC_ACQ_REL); __atomic_load_n(&v, __ATOMIC_ACQUIRE); return 0; }" HAVE_ATOMIC_BUILTINS) + # Create pkg-conf file. # (We use the same files as ./configure does, so we # have to defined the same variables used there). diff --git a/cmake/config.h.cmake b/cmake/config.h.cmake index bc81178..b27b9a3 100644 --- a/cmake/config.h.cmake +++ b/cmake/config.h.cmake @@ -1,35 +1,36 @@ -/* Reduced down to the defines that are actually used in the code */ - -/* Define to 1 if you have the (and friends) header file. */ -#cmakedefine HAVE_INTTYPES_H 1 -#cmakedefine HAVE_STDINT_H 1 +#cmakedefine HAVE_ENDIAN_H 1 +#cmakedefine HAVE_FCNTL_H 1 +#cmakedefine HAVE_SCHED_H 1 +#cmakedefine HAVE_UNISTD_H 1 +#cmakedefine HAVE_SYS_PARAM_H 1 +#cmakedefine HAVE_SYS_STAT_H 1 +#cmakedefine HAVE_SYS_TIME_H 1 #cmakedefine HAVE_SYS_TYPES_H 1 +#cmakedefine HAVE_STDINT_H 1 -/* We must include this here, as in (eg) utf.h it will want to use - the integer type, which in MSVC2010 will be in stdint.h - (there is no inttypes.h in MSVC2010) */ -#if defined(HAVE_STDINT_H) -# include -#elif defined(HAVE_INTTYPES_H) -# include -#elif defined(HAVE_SYS_TYPES_H) -# include -#endif +#cmakedefine HAVE_CLOSE 1 +#cmakedefine HAVE_GETPID 1 +#cmakedefine HAVE_GETTIMEOFDAY 1 +#cmakedefine HAVE_OPEN 1 +#cmakedefine HAVE_READ 1 +#cmakedefine HAVE_SCHED_YIELD 1 + +#cmakedefine HAVE_SYNC_BUILTINS 1 +#cmakedefine HAVE_ATOMIC_BUILTINS 1 -/* Define to 1 if you have the header file. */ #cmakedefine HAVE_LOCALE_H 1 - -/* Define to 1 if you have the 'setlocale' function. */ #cmakedefine HAVE_SETLOCALE 1 -/* Define to the type of a signed integer type of width exactly 32 bits if - such a type exists and the standard includes do not define it. */ #cmakedefine HAVE_INT32_T 1 - #ifndef HAVE_INT32_T # define int32_t @JSON_INT32@ #endif +#cmakedefine HAVE_UINT32_T 1 +#ifndef HAVE_UINT32_T +# define uint32_t @JSON_UINT32@ +#endif + #cmakedefine HAVE_SSIZE_T 1 #ifndef HAVE_SSIZE_T @@ -43,3 +44,6 @@ #endif #cmakedefine HAVE_VSNPRINTF + +#cmakedefine USE_URANDOM 1 +#cmakedefine USE_WINDOWS_CRYPTOAPI 1 diff --git a/configure.ac b/configure.ac index 24ba37d..113cb85 100644 --- a/configure.ac +++ b/configure.ac @@ -14,10 +14,11 @@ AM_CONDITIONAL([GCC], [test x$GCC = xyes]) # Checks for libraries. # Checks for header files. -AC_CHECK_HEADERS([locale.h]) +AC_CHECK_HEADERS([endian.h fcntl.h locale.h sched.h unistd.h sys/param.h sys/stat.h sys/time.h sys/types.h]) # Checks for typedefs, structures, and compiler characteristics. AC_TYPE_INT32_T +AC_TYPE_UINT32_T AC_TYPE_LONG_LONG_INT AC_C_INLINE @@ -29,7 +30,31 @@ esac AC_SUBST([json_inline]) # Checks for library functions. -AC_CHECK_FUNCS([strtoll localeconv]) +AC_CHECK_FUNCS([close getpid gettimeofday localeconv open read sched_yield strtoll]) + +AC_MSG_CHECKING([for gcc __sync builtins]) +have_sync_builtins=no +AC_TRY_LINK( + [], [unsigned long val; __sync_bool_compare_and_swap(&val, 0, 1);], + [have_sync_builtins=yes], +) +if test "x$have_sync_builtins" = "xyes"; then + AC_DEFINE([HAVE_SYNC_BUILTINS], [1], + [Define to 1 if gcc's __sync builtins are available]) +fi +AC_MSG_RESULT([$have_sync_builtins]) + +AC_MSG_CHECKING([for gcc __atomic builtins]) +have_atomic_builtins=no +AC_TRY_LINK( + [], [char l; unsigned long v; __atomic_test_and_set(&l, __ATOMIC_RELAXED); __atomic_store_n(&v, 1, __ATOMIC_ACQ_REL); __atomic_load_n(&v, __ATOMIC_ACQUIRE);], + [have_atomic_builtins=yes], +) +if test "x$have_atomic_builtins" = "xyes"; then + AC_DEFINE([HAVE_ATOMIC_BUILTINS], [1], + [Define to 1 if gcc's __atomic builtins are available]) +fi +AC_MSG_RESULT([$have_atomic_builtins]) case "$ac_cv_type_long_long_int$ac_cv_func_strtoll" in yesyes) json_have_long_long=1;; @@ -43,6 +68,27 @@ case "$ac_cv_header_locale_h$ac_cv_func_localeconv" in esac AC_SUBST([json_have_localeconv]) +# Features +AC_ARG_ENABLE([urandom], + [AS_HELP_STRING([--disable-urandom], + [Don't use /dev/urandom to seed the hash function])], + [use_urandom=$enableval], [use_urandom=yes]) + +if test "x$use_urandom" = xyes; then +AC_DEFINE([USE_URANDOM], [1], + [Define to 1 if /dev/urandom should be used for seeding the hash function]) +fi + +AC_ARG_ENABLE([windows-cryptoapi], + [AS_HELP_STRING([--disable-windows-cryptoapi], + [Don't use CryptGenRandom to seed the hash function])], + [use_windows_cryptoapi=$enableval], [use_windows_cryptoapi=yes]) + +if test "x$use_windows_cryptoapi" = xyes; then +AC_DEFINE([USE_WINDOWS_CRYPTOAPI], [1], + [Define to 1 if CryptGenRandom should be used for seeding the hash function]) +fi + AC_CONFIG_FILES([ jansson.pc Makefile diff --git a/src/Makefile.am b/src/Makefile.am index e1a5493..b4af562 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -8,6 +8,7 @@ libjansson_la_SOURCES = \ error.c \ hashtable.c \ hashtable.h \ + hashtable_seed.c \ jansson_private.h \ load.c \ memory.c \ diff --git a/src/hashtable.c b/src/hashtable.c index 5fb0467..abd4bf1 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -5,8 +5,17 @@ * it under the terms of the MIT license. See LICENSE for details. */ +#if HAVE_CONFIG_H +#include +#endif + #include #include + +#if HAVE_STDINT_H +#include +#endif + #include /* for JSON_INLINE */ #include "jansson_private.h" /* for container_of() */ #include "hashtable.h" @@ -15,24 +24,13 @@ typedef struct hashtable_list list_t; typedef struct hashtable_pair pair_t; typedef struct hashtable_bucket bucket_t; +extern volatile uint32_t hashtable_seed; + +/* Implementation of the hash function */ +#include "lookup3.h" + #define list_to_pair(list_) container_of(list_, pair_t, list) - -/* From http://www.cse.yorku.ca/~oz/hash.html */ -static size_t hash_str(const void *ptr) -{ - const char *str = (const char *)ptr; - - size_t hash = 5381; - size_t c; - - while((c = (size_t)*str)) - { - hash = ((hash << 5) + hash) + c; - str++; - } - - return hash; -} +#define hash_str(key) ((size_t)hashlittle((key), strlen(key), hashtable_seed)) static JSON_INLINE void list_init(list_t *list) { @@ -74,19 +72,6 @@ static void insert_to_bucket(hashtable_t *hashtable, bucket_t *bucket, } } -static const size_t primes[] = { - 5, 13, 23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, - 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, - 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, - 805306457, 1610612741 -}; - -static JSON_INLINE size_t num_buckets(hashtable_t *hashtable) -{ - return primes[hashtable->num_buckets]; -} - - static pair_t *hashtable_find_pair(hashtable_t *hashtable, bucket_t *bucket, const char *key, size_t hash) { @@ -120,7 +105,7 @@ static int hashtable_do_del(hashtable_t *hashtable, bucket_t *bucket; size_t index; - index = hash % num_buckets(hashtable); + index = hash & hashmask(hashtable->order); bucket = &hashtable->buckets[index]; pair = hashtable_find_pair(hashtable, bucket, key, hash); @@ -167,14 +152,14 @@ static int hashtable_do_rehash(hashtable_t *hashtable) jsonp_free(hashtable->buckets); - hashtable->num_buckets++; - new_size = num_buckets(hashtable); + hashtable->order++; + new_size = hashsize(hashtable->order); hashtable->buckets = jsonp_malloc(new_size * sizeof(bucket_t)); if(!hashtable->buckets) return -1; - for(i = 0; i < num_buckets(hashtable); i++) + for(i = 0; i < hashsize(hashtable->order); i++) { hashtable->buckets[i].first = hashtable->buckets[i].last = &hashtable->list; @@ -199,14 +184,14 @@ int hashtable_init(hashtable_t *hashtable) size_t i; hashtable->size = 0; - hashtable->num_buckets = 0; /* index to primes[] */ - hashtable->buckets = jsonp_malloc(num_buckets(hashtable) * sizeof(bucket_t)); + hashtable->order = 3; + hashtable->buckets = jsonp_malloc(hashsize(hashtable->order) * sizeof(bucket_t)); if(!hashtable->buckets) return -1; list_init(&hashtable->list); - for(i = 0; i < num_buckets(hashtable); i++) + for(i = 0; i < hashsize(hashtable->order); i++) { hashtable->buckets[i].first = hashtable->buckets[i].last = &hashtable->list; @@ -230,12 +215,12 @@ int hashtable_set(hashtable_t *hashtable, size_t hash, index; /* rehash if the load ratio exceeds 1 */ - if(hashtable->size >= num_buckets(hashtable)) + if(hashtable->size >= hashsize(hashtable->order)) if(hashtable_do_rehash(hashtable)) return -1; hash = hash_str(key); - index = hash % num_buckets(hashtable); + index = hash & hashmask(hashtable->order); bucket = &hashtable->buckets[index]; pair = hashtable_find_pair(hashtable, bucket, key, hash); @@ -273,7 +258,7 @@ void *hashtable_get(hashtable_t *hashtable, const char *key) bucket_t *bucket; hash = hash_str(key); - bucket = &hashtable->buckets[hash % num_buckets(hashtable)]; + bucket = &hashtable->buckets[hash & hashmask(hashtable->order)]; pair = hashtable_find_pair(hashtable, bucket, key, hash); if(!pair) @@ -294,7 +279,7 @@ void hashtable_clear(hashtable_t *hashtable) hashtable_do_clear(hashtable); - for(i = 0; i < num_buckets(hashtable); i++) + for(i = 0; i < hashsize(hashtable->order); i++) { hashtable->buckets[i].first = hashtable->buckets[i].last = &hashtable->list; @@ -316,7 +301,7 @@ void *hashtable_iter_at(hashtable_t *hashtable, const char *key) bucket_t *bucket; hash = hash_str(key); - bucket = &hashtable->buckets[hash % num_buckets(hashtable)]; + bucket = &hashtable->buckets[hash & hashmask(hashtable->order)]; pair = hashtable_find_pair(hashtable, bucket, key, hash); if(!pair) diff --git a/src/hashtable.h b/src/hashtable.h index 4a7ce6f..469c6ec 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -32,7 +32,7 @@ struct hashtable_bucket { typedef struct hashtable { size_t size; struct hashtable_bucket *buckets; - size_t num_buckets; /* index to primes[] */ + size_t order; /* hashtable has pow(2, order) buckets */ struct hashtable_list list; } hashtable_t; @@ -40,6 +40,7 @@ typedef struct hashtable { #define hashtable_key_to_iter(key_) \ (&(container_of(key_, struct hashtable_pair, key)->list)) + /** * hashtable_init - Initialize a hashtable object * diff --git a/src/hashtable_seed.c b/src/hashtable_seed.c new file mode 100644 index 0000000..a07d145 --- /dev/null +++ b/src/hashtable_seed.c @@ -0,0 +1,278 @@ +/* Generate sizeof(uint32_t) bytes of as random data as possible to seed + the hash function. +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#ifdef HAVE_STDINT_H +#include +#endif + +#ifdef HAVE_FCNTL_H +#include +#endif + +#ifdef HAVE_SCHED_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#if defined(_WIN32) +/* For _getpid() */ +#include +#endif + +#include "jansson.h" + + +static uint32_t buf_to_uint32(char *data) { + size_t i; + uint32_t result = 0; + + for (i = 0; i < sizeof(uint32_t); i++) + result = (result << 8) | (unsigned char)data[i]; + + return result; +} + + + +/* /dev/urandom */ +#if !defined(_WIN32) && defined(USE_URANDOM) +static int seed_from_urandom(uint32_t *seed) { + /* Use unbuffered I/O if we have open(), close() and read(). Otherwise + fall back to fopen() */ + + char data[sizeof(uint32_t)]; + int ok; + +#if defined(HAVE_OPEN) && defined(HAVE_CLOSE) && defined(HAVE_READ) + int urandom; + urandom = open("/dev/urandom", O_RDONLY); + if (urandom == -1) + return 1; + + ok = read(urandom, data, sizeof(uint32_t)) == sizeof(uint32_t); + close(urandom); +#else + FILE *urandom; + + urandom = fopen("/dev/urandom", "rb"); + if (!urandom) + return 1; + + ok = fread(data, 1, sizeof(uint32_t), urandom) == sizeof(uint32_t); + fclose(urandom); +#endif + + if (!ok) + return 1; + + *seed = buf_to_uint32(data); + return 0; +} +#endif + +/* Windows Crypto API */ +#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI) +#include +#include + +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv, LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType, DWORD dwFlags); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen, BYTE *pbBuffer); +typedef BOOL (WINAPI *CRYPTRELEASECONTEXT)(HCRYPTPROV hProv, DWORD dwFlags); + +static int seed_from_windows_cryptoapi(uint32_t *seed) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + CRYPTGENRANDOM pCryptGenRandom = NULL; + CRYPTRELEASECONTEXT pCryptReleaseContext = NULL; + HCRYPTPROV hCryptProv = 0; + BYTE data[sizeof(uint32_t)]; + int ok; + + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + return 1; + + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(hAdvAPI32, "CryptAcquireContextA"); + if (!pCryptAcquireContext) + return 1; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, "CryptGenRandom"); + if (!pCryptGenRandom) + return 1; + + pCryptReleaseContext = (CRYPTRELEASECONTEXT)GetProcAddress(hAdvAPI32, "CryptReleaseContext"); + if (!pCryptReleaseContext) + return 1; + + if (!pCryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + return 1; + + ok = CryptGenRandom(hCryptProv, sizeof(uint32_t), data); + pCryptReleaseContext(hCryptProv, 0); + + if (!ok) + return 1; + + *seed = buf_to_uint32((char *)data); + return 0; +} +#endif + +/* gettimeofday() and getpid() */ +static int seed_from_timestamp_and_pid(uint32_t *seed) { +#ifdef HAVE_GETTIMEOFDAY + /* XOR of seconds and microseconds */ + struct timeval tv; + gettimeofday(&tv, NULL); + *seed = (uint32_t)tv.tv_sec ^ (uint32_t)tv.tv_usec; +#else + /* Seconds only */ + *seed = (uint32_t)time(NULL); +#endif + + /* XOR with PID for more randomness */ +#if defined(_WIN32) + *seed ^= (uint32_t)_getpid(); +#elif defined(HAVE_GETPID) + *seed ^= (uint32_t)getpid(); +#endif + + return 0; +} + +static uint32_t generate_seed() { + uint32_t seed; + int done = 0; + +#if !defined(_WIN32) && defined(USE_URANDOM) + if (!done && seed_from_urandom(&seed) == 0) + done = 1; +#endif + +#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI) + if (!done && seed_from_windows_cryptoapi(&seed) == 0) + done = 1; +#endif + + if (!done) { + /* Fall back to timestamp and PID if no better randomness is + available */ + seed_from_timestamp_and_pid(&seed); + } + + /* Make sure the seed is never zero */ + if (seed == 0) + seed = 1; + + return seed; +} + + +volatile uint32_t hashtable_seed = 0; + +#if defined(HAVE_ATOMIC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32)) +static volatile char seed_initialized = 0; + +void json_object_seed(size_t seed) { + uint32_t new_seed = (uint32_t)seed; + + if (hashtable_seed == 0) { + if (__atomic_test_and_set(&seed_initialized, __ATOMIC_RELAXED) == 0) { + /* Do the seeding ourselves */ + if (new_seed == 0) + new_seed = generate_seed(); + + __atomic_store_n(&hashtable_seed, new_seed, __ATOMIC_ACQ_REL); + } else { + /* Wait for another thread to do the seeding */ + do { +#ifdef HAVE_SCHED_YIELD + sched_yield(); +#endif + } while(__atomic_load_n(&hashtable_seed, __ATOMIC_ACQUIRE) == 0); + } + } +} +#elif defined(HAVE_SYNC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32)) +void json_object_seed(size_t seed) { + uint32_t new_seed = (uint32_t)seed; + + if (hashtable_seed == 0) { + if (new_seed == 0) { + /* Explicit synchronization fences are not supported by the + __sync builtins, so every thread getting here has to + generate the seed value. + */ + new_seed = generate_seed(); + } + + do { + if (__sync_bool_compare_and_swap(&hashtable_seed, 0, new_seed)) { + /* We were the first to seed */ + break; + } else { + /* Wait for another thread to do the seeding */ +#ifdef HAVE_SCHED_YIELD + sched_yield(); +#endif + } + } while(hashtable_seed == 0); + } +} +#elif defined(_WIN32) +static long seed_initialized = 0; +void json_object_seed(size_t seed) { + uint32_t new_seed = (uint32_t)seed; + + if (hashtable_seed == 0) { + if (InterlockedIncrement(&seed_initialized) == 1) { + /* Do the seeding ourselves */ + if (new_seed == 0) + new_seed = generate_seed(); + + hashtable_seed = new_seed; + } else { + /* Wait for another thread to do the seeding */ + do { + SwitchToThread(); + } while (hashtable_seed == 0); + } + } +} +#else +/* Fall back to a thread-unsafe version */ +void json_object_seed(size_t seed) { + uint32_t new_seed = (uint32_t)seed; + + if (hashtable_seed == 0) { + if (new_seed == 0) + new_seed = generate_seed(); + + hashtable_seed = new_seed; + } +} +#endif diff --git a/src/jansson.def b/src/jansson.def index 8cc2e9c..19096d4 100644 --- a/src/jansson.def +++ b/src/jansson.def @@ -41,6 +41,7 @@ EXPORTS json_object_iter_value json_object_iter_set_new json_object_key_to_iter + json_object_seed json_dumps json_dumpf json_dump_file diff --git a/src/jansson.h b/src/jansson.h index 52c8077..ea23085 100644 --- a/src/jansson.h +++ b/src/jansson.h @@ -126,6 +126,7 @@ typedef struct { /* getters, setters, manipulation */ +void json_object_seed(size_t seed); size_t json_object_size(const json_t *object); json_t *json_object_get(const json_t *object, const char *key); int json_object_set_new(json_t *object, const char *key, json_t *value); diff --git a/src/lookup3.h b/src/lookup3.h new file mode 100644 index 0000000..dc76138 --- /dev/null +++ b/src/lookup3.h @@ -0,0 +1,366 @@ +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ + +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef HAVE_STDINT_H +#include /* defines uint32_t etc */ +#endif + +#ifdef HAVE_SYS_PARAM_H +#include /* attempt to define endianness */ +#endif + +#ifdef HAVE_ENDIAN_H +# include /* attempt to define endianness */ +#endif + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} diff --git a/src/utf.h b/src/utf.h index cb10c24..b4f1091 100644 --- a/src/utf.h +++ b/src/utf.h @@ -10,23 +10,11 @@ #ifdef HAVE_CONFIG_H #include +#endif -#ifdef HAVE_INTTYPES_H -/* inttypes.h includes stdint.h in a standard environment, so there's -no need to include stdint.h separately. If inttypes.h doesn't define -int32_t, it's defined in config.h. */ -#include -#endif /* HAVE_INTTYPES_H */ - -#else /* !HAVE_CONFIG_H */ -#ifdef _WIN32 -typedef int int32_t; -#else /* !_WIN32 */ -/* Assume a standard environment */ -#include -#endif /* _WIN32 */ - -#endif /* HAVE_CONFIG_H */ +#ifdef HAVE_STDINT_H +#include +#endif int utf8_encode(int codepoint, char *buffer, int *size); diff --git a/src/value.c b/src/value.c index 582849b..1b02d90 100644 --- a/src/value.c +++ b/src/value.c @@ -9,11 +9,19 @@ #define _GNU_SOURCE #endif +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include #include #include +#ifdef HAVE_STDINT_H +#include +#endif + #include "jansson.h" #include "hashtable.h" #include "jansson_private.h" @@ -36,11 +44,19 @@ static JSON_INLINE void json_init(json_t *json, json_type type) /*** object ***/ +extern volatile uint32_t hashtable_seed; + json_t *json_object(void) { json_object_t *object = jsonp_malloc(sizeof(json_object_t)); if(!object) return NULL; + + if (!hashtable_seed) { + /* Autoseed */ + json_object_seed(0); + } + json_init(&object->json, JSON_OBJECT); if(hashtable_init(&object->hashtable)) diff --git a/test/bin/json_process.c b/test/bin/json_process.c index 23afefe..e2c54bd 100644 --- a/test/bin/json_process.c +++ b/test/bin/json_process.c @@ -37,6 +37,8 @@ struct config { int sort_keys; int strip; int use_env; + int have_hashseed; + int hashseed; } conf; #define l_isspace(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || (c) == '\t') @@ -108,6 +110,12 @@ static void read_conf(FILE *conffile) conf.sort_keys = atoi(val); if (!strcmp(line, "STRIP")) conf.strip = atoi(val); + if (!strcmp(line, "HASHSEED")) { + conf.have_hashseed = 1; + conf.hashseed = atoi(val); + } else { + conf.have_hashseed = 0; + } } free(buffer); @@ -188,6 +196,9 @@ int use_conf(char *test_path) if (conf.sort_keys) flags |= JSON_SORT_KEYS; + if (conf.have_hashseed) + json_object_seed(conf.hashseed); + if (conf.strip) { /* Load to memory, strip leading and trailing whitespace */ buffer = loadfile(infile); @@ -265,7 +276,10 @@ int use_env() flags |= JSON_PRESERVE_ORDER; if(getenv_int("JSON_SORT_KEYS")) - flags |= JSON_SORT_KEYS; + flags |= JSON_SORT_KEYS; + + if(getenv("HASHSEED")) + json_object_seed(getenv_int("HASHSEED")); if(getenv_int("STRIP")) { /* Load to memory, strip leading and trailing whitespace */ diff --git a/test/suites/api/test_memory_funcs.c b/test/suites/api/test_memory_funcs.c index 8737389..84b1746 100644 --- a/test/suites/api/test_memory_funcs.c +++ b/test/suites/api/test_memory_funcs.c @@ -24,13 +24,13 @@ static void create_and_free_complex_object() static void *my_malloc(size_t size) { - malloc_called += 1; + malloc_called = 1; return malloc(size); } static void my_free(void *ptr) { - free_called += 1; + free_called = 1; free(ptr); } @@ -39,7 +39,7 @@ static void test_simple() json_set_alloc_funcs(my_malloc, my_free); create_and_free_complex_object(); - if(malloc_called != 20 || free_called != 20) + if(malloc_called != 1 || free_called != 1) fail("Custom allocation failed"); } diff --git a/test/suites/api/test_object.c b/test/suites/api/test_object.c index ba428e1..92e5208 100644 --- a/test/suites/api/test_object.c +++ b/test/suites/api/test_object.c @@ -249,7 +249,11 @@ static void test_set_nocheck() static void test_iterators() { + int i; json_t *object, *foo, *bar, *baz; + const char *iter_keys[3]; + int have_key[3] = { 0, 0, 0 }; + json_t *iter_values[3]; void *iter; if(json_object_iter(NULL)) @@ -276,30 +280,50 @@ static void test_iterators() iter = json_object_iter(object); if(!iter) fail("unable to get iterator"); - if(strcmp(json_object_iter_key(iter), "a")) - fail("iterating failed: wrong key"); - if(json_object_iter_value(iter) != foo) - fail("iterating failed: wrong value"); + iter_keys[0] = json_object_iter_key(iter); + iter_values[0] = json_object_iter_value(iter); iter = json_object_iter_next(object, iter); if(!iter) fail("unable to increment iterator"); - if(strcmp(json_object_iter_key(iter), "b")) - fail("iterating failed: wrong key"); - if(json_object_iter_value(iter) != bar) - fail("iterating failed: wrong value"); + iter_keys[1] = json_object_iter_key(iter); + iter_values[1] = json_object_iter_value(iter); iter = json_object_iter_next(object, iter); if(!iter) fail("unable to increment iterator"); - if(strcmp(json_object_iter_key(iter), "c")) - fail("iterating failed: wrong key"); - if(json_object_iter_value(iter) != baz) - fail("iterating failed: wrong value"); + iter_keys[2] = json_object_iter_key(iter); + iter_values[2] = json_object_iter_value(iter); if(json_object_iter_next(object, iter) != NULL) fail("able to iterate over the end"); + /* Check that keys have correct values */ + for (i = 0; i < 3; i++) { + if (strcmp(iter_keys[i], "a") == 0) { + if (iter_values[i] != foo) + fail("wrong value for iter key a"); + else + have_key[0] = 1; + } else if (strcmp(iter_keys[i], "b") == 0) { + if (iter_values[i] != bar) + fail("wrong value for iter key b"); + else + have_key[1] = 1; + } else if (strcmp(iter_keys[i], "c") == 0) { + if (iter_values[i] != baz) + fail("wrong value for iter key c"); + else + have_key[2] = 1; + } + } + + /* Check that we got all keys */ + for(i = 0; i < 3; i++) { + if(!have_key[i]) + fail("a key wasn't iterated over"); + } + if(json_object_iter_at(object, "foo")) fail("json_object_iter_at() succeeds for non-existent key"); @@ -312,22 +336,14 @@ static void test_iterators() if(json_object_iter_value(iter) != bar) fail("iterating failed: wrong value"); - iter = json_object_iter_next(object, iter); - if(!iter) - fail("unable to increment iterator"); - if(strcmp(json_object_iter_key(iter), "c")) - fail("iterating failed: wrong key"); - if(json_object_iter_value(iter) != baz) - fail("iterating failed: wrong value"); - - if(json_object_iter_set(object, iter, bar)) + if(json_object_iter_set(object, iter, baz)) fail("unable to set value at iterator"); - if(strcmp(json_object_iter_key(iter), "c")) + if(strcmp(json_object_iter_key(iter), "b")) fail("json_object_iter_key() fails after json_object_iter_set()"); - if(json_object_iter_value(iter) != bar) + if(json_object_iter_value(iter) != baz) fail("json_object_iter_value() fails after json_object_iter_set()"); - if(json_object_get(object, "c") != bar) + if(json_object_get(object, "b") != baz) fail("json_object_get() fails after json_object_iter_set()"); json_decref(object); diff --git a/test/suites/encoding-flags/compact-object/env b/test/suites/encoding-flags/compact-object/env index 4474aaf..93cb33d 100644 --- a/test/suites/encoding-flags/compact-object/env +++ b/test/suites/encoding-flags/compact-object/env @@ -1,2 +1,3 @@ JSON_COMPACT=1 -export JSON_COMPACT +HASHSEED=1 +export JSON_COMPACT HASHSEED diff --git a/test/suites/encoding-flags/indent-compact-object/env b/test/suites/encoding-flags/indent-compact-object/env index 78fbfcc..c73acc1 100644 --- a/test/suites/encoding-flags/indent-compact-object/env +++ b/test/suites/encoding-flags/indent-compact-object/env @@ -1,3 +1,4 @@ JSON_INDENT=4 JSON_COMPACT=1 -export JSON_INDENT JSON_COMPACT +HASHSEED=1 +export JSON_INDENT JSON_COMPACT HASHSEED diff --git a/test/suites/encoding-flags/indent-object/env b/test/suites/encoding-flags/indent-object/env index d220f83..961558c 100644 --- a/test/suites/encoding-flags/indent-object/env +++ b/test/suites/encoding-flags/indent-object/env @@ -1,2 +1,3 @@ JSON_INDENT=4 -export JSON_INDENT +HASHSEED=1 +export JSON_INDENT HASHSEED diff --git a/test/suites/encoding-flags/object/env b/test/suites/encoding-flags/object/env new file mode 100644 index 0000000..9120b03 --- /dev/null +++ b/test/suites/encoding-flags/object/env @@ -0,0 +1,2 @@ +HASHSEED=1 +export HASHSEED