CVE-2013-6401: Change hash function, randomize hashes

Thanks to Florian Weimer and Eric Sesterhenn for reporting, reviewing
and testing.
This commit is contained in:
Petri Lehtinen 2014-01-14 11:16:39 +02:00
parent b9c588de3d
commit 8f80c2d838
19 changed files with 874 additions and 123 deletions

View File

@ -52,6 +52,8 @@ project (jansson C)
# Options
OPTION (BUILD_SHARED_LIBS "Build shared libraries." OFF)
OPTION (USE_URANDOM "Use /dev/urandom to seed the hash function." ON)
OPTION (USE_WINDOWS_CRYPTOAPI "Use CryptGenRandom to seed the hash function." ON)
if (MSVC)
# This option must match the settings used in your program, in particular if you
@ -85,12 +87,12 @@ set (JANSSON_SOVERSION 4)
# for CheckFunctionKeywords
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
INCLUDE (CheckCSourceCompiles)
include (CheckFunctionExists)
include (CheckFunctionKeywords)
include (CheckIncludeFiles)
include (CheckTypeSize)
if (MSVC)
# Turn off Microsofts "security" warnings.
add_definitions( "/W3 /D_CRT_SECURE_NO_WARNINGS /wd4005 /wd4996 /nologo" )
@ -106,14 +108,25 @@ if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_C_FLAGS "-fPIC")
endif()
check_include_files (endian.h HAVE_ENDIAN_H)
check_include_files (fcntl.h HAVE_FCNTL_H)
check_include_files (sched.h HAVE_SCHED_H)
check_include_files (unistd.h HAVE_UNISTD_H)
check_include_files (sys/param.h HAVE_SYS_PARAM_H)
check_include_files (sys/stat.h HAVE_SYS_STAT_H)
check_include_files (sys/time.h HAVE_SYS_TIME_H)
check_include_files (sys/time.h HAVE_SYS_TYPES_H)
check_function_exists (close HAVE_CLOSE)
check_function_exists (getpid HAVE_GETPID)
check_function_exists (gettimeofday HAVE_GETTIMEOFDAY)
check_function_exists (open HAVE_OPEN)
check_function_exists (read HAVE_READ)
check_function_exists (sched_yield HAVE_SCHED_YIELD)
# Check for the int-type includes
check_include_files (sys/types.h HAVE_SYS_TYPES_H)
check_include_files (inttypes.h HAVE_INTTYPES_H)
check_include_files (stdint.h HAVE_STDINT_H)
# Check our 64 bit integer sizes
check_type_size (__int64 __INT64)
check_type_size (int64_t INT64_T)
@ -124,17 +137,32 @@ check_type_size (int32_t INT32_T)
check_type_size (__int32 __INT32)
check_type_size ("long" LONG_INT)
check_type_size ("int" INT)
if (HAVE_INT32_T)
set (JSON_INT32 int32_t)
elseif (HAVE___INT32)
set (JSON_INT32 __int32)
elseif (HAVE_LONG AND (${LONG_INT} EQUAL 4))
elseif (HAVE_LONG_INT AND (${LONG_INT} EQUAL 4))
set (JSON_INT32 long)
elseif (HAVE_INT AND (${INT} EQUAL 4))
set (JSON_INT32 int)
else ()
message (FATAL_ERROR "Could not detect a valid 32 bit integer type")
message (FATAL_ERROR "Could not detect a valid 32-bit integer type")
endif ()
check_type_size (uint32_t UINT32_T)
check_type_size (__uint32 __UINT32)
check_type_size ("unsigned long" UNSIGNED_LONG_INT)
check_type_size ("unsigned int" UNSIGNED_INT)
if (HAVE_UINT32_T)
set (JSON_UINT32 uint32_t)
elseif (HAVE___UINT32)
set (JSON_UINT32 __uint32)
elseif (HAVE_UNSIGNED_LONG_INT AND (${UNSIGNED_LONG_INT} EQUAL 4))
set (JSON_UINT32 "unsigned long")
elseif (HAVE_UNSIGNED_INT AND (${UNSIGNED_INT} EQUAL 4))
set (JSON_UINT32 "unsigned int")
else ()
message (FATAL_ERROR "Could not detect a valid unsigned 32-bit integer type")
endif ()
# Check for ssize_t and SSIZE_T existance.
@ -206,11 +234,9 @@ else ()
set (JSON_HAVE_LOCALECONV 0)
endif ()
# check if we have setlocale
check_function_exists (setlocale HAVE_SETLOCALE)
# Check what the inline keyword is.
# Note that the original JSON_INLINE was always set to just 'inline', so this goes further.
check_function_keywords("inline")
@ -238,6 +264,9 @@ elseif (HAVE__SNPRINTF)
set (JSON_SNPRINTF _snprintf)
endif ()
check_c_source_compiles ("int main() { unsigned long val; __sync_bool_compare_and_swap(&val, 0, 1); return 0; } " HAVE_SYNC_BUILTINS)
check_c_source_compiles ("int main() { char l; unsigned long v; __atomic_test_and_set(&l, __ATOMIC_RELAXED); __atomic_store_n(&v, 1, __ATOMIC_ACQ_REL); __atomic_load_n(&v, __ATOMIC_ACQUIRE); return 0; }" HAVE_ATOMIC_BUILTINS)
# Create pkg-conf file.
# (We use the same files as ./configure does, so we
# have to defined the same variables used there).

View File

@ -1,35 +1,36 @@
/* Reduced down to the defines that are actually used in the code */
/* Define to 1 if you have the <inttypes.h> (and friends) header file. */
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_STDINT_H 1
#cmakedefine HAVE_ENDIAN_H 1
#cmakedefine HAVE_FCNTL_H 1
#cmakedefine HAVE_SCHED_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine HAVE_SYS_PARAM_H 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TIME_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_STDINT_H 1
/* We must include this here, as in (eg) utf.h it will want to use
the integer type, which in MSVC2010 will be in stdint.h
(there is no inttypes.h in MSVC2010) */
#if defined(HAVE_STDINT_H)
# include <stdint.h>
#elif defined(HAVE_INTTYPES_H)
# include <inttypes.h>
#elif defined(HAVE_SYS_TYPES_H)
# include <sys/types.h>
#endif
#cmakedefine HAVE_CLOSE 1
#cmakedefine HAVE_GETPID 1
#cmakedefine HAVE_GETTIMEOFDAY 1
#cmakedefine HAVE_OPEN 1
#cmakedefine HAVE_READ 1
#cmakedefine HAVE_SCHED_YIELD 1
#cmakedefine HAVE_SYNC_BUILTINS 1
#cmakedefine HAVE_ATOMIC_BUILTINS 1
/* Define to 1 if you have the <locale.h> header file. */
#cmakedefine HAVE_LOCALE_H 1
/* Define to 1 if you have the 'setlocale' function. */
#cmakedefine HAVE_SETLOCALE 1
/* Define to the type of a signed integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#cmakedefine HAVE_INT32_T 1
#ifndef HAVE_INT32_T
# define int32_t @JSON_INT32@
#endif
#cmakedefine HAVE_UINT32_T 1
#ifndef HAVE_UINT32_T
# define uint32_t @JSON_UINT32@
#endif
#cmakedefine HAVE_SSIZE_T 1
#ifndef HAVE_SSIZE_T
@ -43,3 +44,6 @@
#endif
#cmakedefine HAVE_VSNPRINTF
#cmakedefine USE_URANDOM 1
#cmakedefine USE_WINDOWS_CRYPTOAPI 1

View File

@ -14,10 +14,11 @@ AM_CONDITIONAL([GCC], [test x$GCC = xyes])
# Checks for libraries.
# Checks for header files.
AC_CHECK_HEADERS([locale.h])
AC_CHECK_HEADERS([endian.h fcntl.h locale.h sched.h unistd.h sys/param.h sys/stat.h sys/time.h sys/types.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_TYPE_INT32_T
AC_TYPE_UINT32_T
AC_TYPE_LONG_LONG_INT
AC_C_INLINE
@ -29,7 +30,31 @@ esac
AC_SUBST([json_inline])
# Checks for library functions.
AC_CHECK_FUNCS([strtoll localeconv])
AC_CHECK_FUNCS([close getpid gettimeofday localeconv open read sched_yield strtoll])
AC_MSG_CHECKING([for gcc __sync builtins])
have_sync_builtins=no
AC_TRY_LINK(
[], [unsigned long val; __sync_bool_compare_and_swap(&val, 0, 1);],
[have_sync_builtins=yes],
)
if test "x$have_sync_builtins" = "xyes"; then
AC_DEFINE([HAVE_SYNC_BUILTINS], [1],
[Define to 1 if gcc's __sync builtins are available])
fi
AC_MSG_RESULT([$have_sync_builtins])
AC_MSG_CHECKING([for gcc __atomic builtins])
have_atomic_builtins=no
AC_TRY_LINK(
[], [char l; unsigned long v; __atomic_test_and_set(&l, __ATOMIC_RELAXED); __atomic_store_n(&v, 1, __ATOMIC_ACQ_REL); __atomic_load_n(&v, __ATOMIC_ACQUIRE);],
[have_atomic_builtins=yes],
)
if test "x$have_atomic_builtins" = "xyes"; then
AC_DEFINE([HAVE_ATOMIC_BUILTINS], [1],
[Define to 1 if gcc's __atomic builtins are available])
fi
AC_MSG_RESULT([$have_atomic_builtins])
case "$ac_cv_type_long_long_int$ac_cv_func_strtoll" in
yesyes) json_have_long_long=1;;
@ -43,6 +68,27 @@ case "$ac_cv_header_locale_h$ac_cv_func_localeconv" in
esac
AC_SUBST([json_have_localeconv])
# Features
AC_ARG_ENABLE([urandom],
[AS_HELP_STRING([--disable-urandom],
[Don't use /dev/urandom to seed the hash function])],
[use_urandom=$enableval], [use_urandom=yes])
if test "x$use_urandom" = xyes; then
AC_DEFINE([USE_URANDOM], [1],
[Define to 1 if /dev/urandom should be used for seeding the hash function])
fi
AC_ARG_ENABLE([windows-cryptoapi],
[AS_HELP_STRING([--disable-windows-cryptoapi],
[Don't use CryptGenRandom to seed the hash function])],
[use_windows_cryptoapi=$enableval], [use_windows_cryptoapi=yes])
if test "x$use_windows_cryptoapi" = xyes; then
AC_DEFINE([USE_WINDOWS_CRYPTOAPI], [1],
[Define to 1 if CryptGenRandom should be used for seeding the hash function])
fi
AC_CONFIG_FILES([
jansson.pc
Makefile

View File

@ -8,6 +8,7 @@ libjansson_la_SOURCES = \
error.c \
hashtable.c \
hashtable.h \
hashtable_seed.c \
jansson_private.h \
load.c \
memory.c \

View File

@ -5,8 +5,17 @@
* it under the terms of the MIT license. See LICENSE for details.
*/
#if HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdlib.h>
#include <string.h>
#if HAVE_STDINT_H
#include <stdint.h>
#endif
#include <jansson_config.h> /* for JSON_INLINE */
#include "jansson_private.h" /* for container_of() */
#include "hashtable.h"
@ -15,24 +24,13 @@ typedef struct hashtable_list list_t;
typedef struct hashtable_pair pair_t;
typedef struct hashtable_bucket bucket_t;
extern volatile uint32_t hashtable_seed;
/* Implementation of the hash function */
#include "lookup3.h"
#define list_to_pair(list_) container_of(list_, pair_t, list)
/* From http://www.cse.yorku.ca/~oz/hash.html */
static size_t hash_str(const void *ptr)
{
const char *str = (const char *)ptr;
size_t hash = 5381;
size_t c;
while((c = (size_t)*str))
{
hash = ((hash << 5) + hash) + c;
str++;
}
return hash;
}
#define hash_str(key) ((size_t)hashlittle((key), strlen(key), hashtable_seed))
static JSON_INLINE void list_init(list_t *list)
{
@ -74,19 +72,6 @@ static void insert_to_bucket(hashtable_t *hashtable, bucket_t *bucket,
}
}
static const size_t primes[] = {
5, 13, 23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
805306457, 1610612741
};
static JSON_INLINE size_t num_buckets(hashtable_t *hashtable)
{
return primes[hashtable->num_buckets];
}
static pair_t *hashtable_find_pair(hashtable_t *hashtable, bucket_t *bucket,
const char *key, size_t hash)
{
@ -120,7 +105,7 @@ static int hashtable_do_del(hashtable_t *hashtable,
bucket_t *bucket;
size_t index;
index = hash % num_buckets(hashtable);
index = hash & hashmask(hashtable->order);
bucket = &hashtable->buckets[index];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
@ -167,14 +152,14 @@ static int hashtable_do_rehash(hashtable_t *hashtable)
jsonp_free(hashtable->buckets);
hashtable->num_buckets++;
new_size = num_buckets(hashtable);
hashtable->order++;
new_size = hashsize(hashtable->order);
hashtable->buckets = jsonp_malloc(new_size * sizeof(bucket_t));
if(!hashtable->buckets)
return -1;
for(i = 0; i < num_buckets(hashtable); i++)
for(i = 0; i < hashsize(hashtable->order); i++)
{
hashtable->buckets[i].first = hashtable->buckets[i].last =
&hashtable->list;
@ -199,14 +184,14 @@ int hashtable_init(hashtable_t *hashtable)
size_t i;
hashtable->size = 0;
hashtable->num_buckets = 0; /* index to primes[] */
hashtable->buckets = jsonp_malloc(num_buckets(hashtable) * sizeof(bucket_t));
hashtable->order = 3;
hashtable->buckets = jsonp_malloc(hashsize(hashtable->order) * sizeof(bucket_t));
if(!hashtable->buckets)
return -1;
list_init(&hashtable->list);
for(i = 0; i < num_buckets(hashtable); i++)
for(i = 0; i < hashsize(hashtable->order); i++)
{
hashtable->buckets[i].first = hashtable->buckets[i].last =
&hashtable->list;
@ -230,12 +215,12 @@ int hashtable_set(hashtable_t *hashtable,
size_t hash, index;
/* rehash if the load ratio exceeds 1 */
if(hashtable->size >= num_buckets(hashtable))
if(hashtable->size >= hashsize(hashtable->order))
if(hashtable_do_rehash(hashtable))
return -1;
hash = hash_str(key);
index = hash % num_buckets(hashtable);
index = hash & hashmask(hashtable->order);
bucket = &hashtable->buckets[index];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
@ -273,7 +258,7 @@ void *hashtable_get(hashtable_t *hashtable, const char *key)
bucket_t *bucket;
hash = hash_str(key);
bucket = &hashtable->buckets[hash % num_buckets(hashtable)];
bucket = &hashtable->buckets[hash & hashmask(hashtable->order)];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
if(!pair)
@ -294,7 +279,7 @@ void hashtable_clear(hashtable_t *hashtable)
hashtable_do_clear(hashtable);
for(i = 0; i < num_buckets(hashtable); i++)
for(i = 0; i < hashsize(hashtable->order); i++)
{
hashtable->buckets[i].first = hashtable->buckets[i].last =
&hashtable->list;
@ -316,7 +301,7 @@ void *hashtable_iter_at(hashtable_t *hashtable, const char *key)
bucket_t *bucket;
hash = hash_str(key);
bucket = &hashtable->buckets[hash % num_buckets(hashtable)];
bucket = &hashtable->buckets[hash & hashmask(hashtable->order)];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
if(!pair)

View File

@ -32,7 +32,7 @@ struct hashtable_bucket {
typedef struct hashtable {
size_t size;
struct hashtable_bucket *buckets;
size_t num_buckets; /* index to primes[] */
size_t order; /* hashtable has pow(2, order) buckets */
struct hashtable_list list;
} hashtable_t;
@ -40,6 +40,7 @@ typedef struct hashtable {
#define hashtable_key_to_iter(key_) \
(&(container_of(key_, struct hashtable_pair, key)->list))
/**
* hashtable_init - Initialize a hashtable object
*

278
src/hashtable_seed.c Normal file
View File

@ -0,0 +1,278 @@
/* Generate sizeof(uint32_t) bytes of as random data as possible to seed
the hash function.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <time.h>
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_SCHED_H
#include <sched.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if defined(_WIN32)
/* For _getpid() */
#include <process.h>
#endif
#include "jansson.h"
static uint32_t buf_to_uint32(char *data) {
size_t i;
uint32_t result = 0;
for (i = 0; i < sizeof(uint32_t); i++)
result = (result << 8) | (unsigned char)data[i];
return result;
}
/* /dev/urandom */
#if !defined(_WIN32) && defined(USE_URANDOM)
static int seed_from_urandom(uint32_t *seed) {
/* Use unbuffered I/O if we have open(), close() and read(). Otherwise
fall back to fopen() */
char data[sizeof(uint32_t)];
int ok;
#if defined(HAVE_OPEN) && defined(HAVE_CLOSE) && defined(HAVE_READ)
int urandom;
urandom = open("/dev/urandom", O_RDONLY);
if (urandom == -1)
return 1;
ok = read(urandom, data, sizeof(uint32_t)) == sizeof(uint32_t);
close(urandom);
#else
FILE *urandom;
urandom = fopen("/dev/urandom", "rb");
if (!urandom)
return 1;
ok = fread(data, 1, sizeof(uint32_t), urandom) == sizeof(uint32_t);
fclose(urandom);
#endif
if (!ok)
return 1;
*seed = buf_to_uint32(data);
return 0;
}
#endif
/* Windows Crypto API */
#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI)
#include <windows.h>
#include <wincrypt.h>
typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv, LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType, DWORD dwFlags);
typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen, BYTE *pbBuffer);
typedef BOOL (WINAPI *CRYPTRELEASECONTEXT)(HCRYPTPROV hProv, DWORD dwFlags);
static int seed_from_windows_cryptoapi(uint32_t *seed)
{
HINSTANCE hAdvAPI32 = NULL;
CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
CRYPTGENRANDOM pCryptGenRandom = NULL;
CRYPTRELEASECONTEXT pCryptReleaseContext = NULL;
HCRYPTPROV hCryptProv = 0;
BYTE data[sizeof(uint32_t)];
int ok;
hAdvAPI32 = GetModuleHandle("advapi32.dll");
if(hAdvAPI32 == NULL)
return 1;
pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(hAdvAPI32, "CryptAcquireContextA");
if (!pCryptAcquireContext)
return 1;
pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, "CryptGenRandom");
if (!pCryptGenRandom)
return 1;
pCryptReleaseContext = (CRYPTRELEASECONTEXT)GetProcAddress(hAdvAPI32, "CryptReleaseContext");
if (!pCryptReleaseContext)
return 1;
if (!pCryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
return 1;
ok = CryptGenRandom(hCryptProv, sizeof(uint32_t), data);
pCryptReleaseContext(hCryptProv, 0);
if (!ok)
return 1;
*seed = buf_to_uint32((char *)data);
return 0;
}
#endif
/* gettimeofday() and getpid() */
static int seed_from_timestamp_and_pid(uint32_t *seed) {
#ifdef HAVE_GETTIMEOFDAY
/* XOR of seconds and microseconds */
struct timeval tv;
gettimeofday(&tv, NULL);
*seed = (uint32_t)tv.tv_sec ^ (uint32_t)tv.tv_usec;
#else
/* Seconds only */
*seed = (uint32_t)time(NULL);
#endif
/* XOR with PID for more randomness */
#if defined(_WIN32)
*seed ^= (uint32_t)_getpid();
#elif defined(HAVE_GETPID)
*seed ^= (uint32_t)getpid();
#endif
return 0;
}
static uint32_t generate_seed() {
uint32_t seed;
int done = 0;
#if !defined(_WIN32) && defined(USE_URANDOM)
if (!done && seed_from_urandom(&seed) == 0)
done = 1;
#endif
#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI)
if (!done && seed_from_windows_cryptoapi(&seed) == 0)
done = 1;
#endif
if (!done) {
/* Fall back to timestamp and PID if no better randomness is
available */
seed_from_timestamp_and_pid(&seed);
}
/* Make sure the seed is never zero */
if (seed == 0)
seed = 1;
return seed;
}
volatile uint32_t hashtable_seed = 0;
#if defined(HAVE_ATOMIC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32))
static volatile char seed_initialized = 0;
void json_object_seed(size_t seed) {
uint32_t new_seed = (uint32_t)seed;
if (hashtable_seed == 0) {
if (__atomic_test_and_set(&seed_initialized, __ATOMIC_RELAXED) == 0) {
/* Do the seeding ourselves */
if (new_seed == 0)
new_seed = generate_seed();
__atomic_store_n(&hashtable_seed, new_seed, __ATOMIC_ACQ_REL);
} else {
/* Wait for another thread to do the seeding */
do {
#ifdef HAVE_SCHED_YIELD
sched_yield();
#endif
} while(__atomic_load_n(&hashtable_seed, __ATOMIC_ACQUIRE) == 0);
}
}
}
#elif defined(HAVE_SYNC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32))
void json_object_seed(size_t seed) {
uint32_t new_seed = (uint32_t)seed;
if (hashtable_seed == 0) {
if (new_seed == 0) {
/* Explicit synchronization fences are not supported by the
__sync builtins, so every thread getting here has to
generate the seed value.
*/
new_seed = generate_seed();
}
do {
if (__sync_bool_compare_and_swap(&hashtable_seed, 0, new_seed)) {
/* We were the first to seed */
break;
} else {
/* Wait for another thread to do the seeding */
#ifdef HAVE_SCHED_YIELD
sched_yield();
#endif
}
} while(hashtable_seed == 0);
}
}
#elif defined(_WIN32)
static long seed_initialized = 0;
void json_object_seed(size_t seed) {
uint32_t new_seed = (uint32_t)seed;
if (hashtable_seed == 0) {
if (InterlockedIncrement(&seed_initialized) == 1) {
/* Do the seeding ourselves */
if (new_seed == 0)
new_seed = generate_seed();
hashtable_seed = new_seed;
} else {
/* Wait for another thread to do the seeding */
do {
SwitchToThread();
} while (hashtable_seed == 0);
}
}
}
#else
/* Fall back to a thread-unsafe version */
void json_object_seed(size_t seed) {
uint32_t new_seed = (uint32_t)seed;
if (hashtable_seed == 0) {
if (new_seed == 0)
new_seed = generate_seed();
hashtable_seed = new_seed;
}
}
#endif

View File

@ -41,6 +41,7 @@ EXPORTS
json_object_iter_value
json_object_iter_set_new
json_object_key_to_iter
json_object_seed
json_dumps
json_dumpf
json_dump_file

View File

@ -126,6 +126,7 @@ typedef struct {
/* getters, setters, manipulation */
void json_object_seed(size_t seed);
size_t json_object_size(const json_t *object);
json_t *json_object_get(const json_t *object, const char *key);
int json_object_set_new(json_t *object, const char *key, json_t *value);

366
src/lookup3.h Normal file
View File

@ -0,0 +1,366 @@
/*
-------------------------------------------------------------------------------
lookup3.c, by Bob Jenkins, May 2006, Public Domain.
These are functions for producing 32-bit hashes for hash table lookup.
hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
are externally useful functions. Routines to test the hash are included
if SELF_TEST is defined. You can use this free for any purpose. It's in
the public domain. It has no warranty.
You probably want to use hashlittle(). hashlittle() and hashbig()
hash byte arrays. hashlittle() is is faster than hashbig() on
little-endian machines. Intel and AMD are little-endian machines.
On second thought, you probably want hashlittle2(), which is identical to
hashlittle() except it returns two 32-bit hashes for the price of one.
You could implement hashbig2() if you wanted but I haven't bothered here.
If you want to find a hash of, say, exactly 7 integers, do
a = i1; b = i2; c = i3;
mix(a,b,c);
a += i4; b += i5; c += i6;
mix(a,b,c);
a += i7;
final(a,b,c);
then use c as the hash value. If you have a variable length array of
4-byte integers to hash, use hashword(). If you have a byte array (like
a character string), use hashlittle(). If you have several byte arrays, or
a mix of things, see the comments above hashlittle().
Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
then mix those integers. This is fast (you can do a lot more thorough
mixing with 12*3 instructions on 3 integers than you can with 3 instructions
on 1 byte), but shoehorning those bytes into integers efficiently is messy.
-------------------------------------------------------------------------------
*/
#include <stdlib.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h> /* defines uint32_t etc */
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h> /* attempt to define endianness */
#endif
#ifdef HAVE_ENDIAN_H
# include <endian.h> /* attempt to define endianness */
#endif
/*
* My best guess at if you are big-endian or little-endian. This may
* need adjustment.
*/
#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
__BYTE_ORDER == __LITTLE_ENDIAN) || \
(defined(i386) || defined(__i386__) || defined(__i486__) || \
defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
# define HASH_LITTLE_ENDIAN 1
# define HASH_BIG_ENDIAN 0
#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
__BYTE_ORDER == __BIG_ENDIAN) || \
(defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 1
#else
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 0
#endif
#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
/*
-------------------------------------------------------------------------------
mix -- mix 3 32-bit values reversibly.
This is reversible, so any information in (a,b,c) before mix() is
still in (a,b,c) after mix().
If four pairs of (a,b,c) inputs are run through mix(), or through
mix() in reverse, there are at least 32 bits of the output that
are sometimes the same for one pair and different for another pair.
This was tested for:
* pairs that differed by one bit, by two bits, in any combination
of top bits of (a,b,c), or in any combination of bottom bits of
(a,b,c).
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
is commonly produced by subtraction) look like a single 1-bit
difference.
* the base values were pseudorandom, all zero but one bit set, or
all zero plus a counter that starts at zero.
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
satisfy this are
4 6 8 16 19 4
9 15 3 18 27 15
14 9 3 7 17 3
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
for "differ" defined as + with a one-bit base and a two-bit delta. I
used http://burtleburtle.net/bob/hash/avalanche.html to choose
the operations, constants, and arrangements of the variables.
This does not achieve avalanche. There are input bits of (a,b,c)
that fail to affect some output bits of (a,b,c), especially of a. The
most thoroughly mixed value is c, but it doesn't really even achieve
avalanche in c.
This allows some parallelism. Read-after-writes are good at doubling
the number of bits affected, so the goal of mixing pulls in the opposite
direction as the goal of parallelism. I did what I could. Rotates
seem to cost as much as shifts on every machine I could lay my hands
on, and rotates are much kinder to the top and bottom bits, so I used
rotates.
-------------------------------------------------------------------------------
*/
#define mix(a,b,c) \
{ \
a -= c; a ^= rot(c, 4); c += b; \
b -= a; b ^= rot(a, 6); a += c; \
c -= b; c ^= rot(b, 8); b += a; \
a -= c; a ^= rot(c,16); c += b; \
b -= a; b ^= rot(a,19); a += c; \
c -= b; c ^= rot(b, 4); b += a; \
}
/*
-------------------------------------------------------------------------------
final -- final mixing of 3 32-bit values (a,b,c) into c
Pairs of (a,b,c) values differing in only a few bits will usually
produce values of c that look totally different. This was tested for
* pairs that differed by one bit, by two bits, in any combination
of top bits of (a,b,c), or in any combination of bottom bits of
(a,b,c).
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
is commonly produced by subtraction) look like a single 1-bit
difference.
* the base values were pseudorandom, all zero but one bit set, or
all zero plus a counter that starts at zero.
These constants passed:
14 11 25 16 4 14 24
12 14 25 16 4 14 24
and these came close:
4 8 15 26 3 22 24
10 8 15 26 3 22 24
11 8 15 26 3 22 24
-------------------------------------------------------------------------------
*/
#define final(a,b,c) \
{ \
c ^= b; c -= rot(b,14); \
a ^= c; a -= rot(c,11); \
b ^= a; b -= rot(a,25); \
c ^= b; c -= rot(b,16); \
a ^= c; a -= rot(c,4); \
b ^= a; b -= rot(a,14); \
c ^= b; c -= rot(b,24); \
}
/*
-------------------------------------------------------------------------------
hashlittle() -- hash a variable-length key into a 32-bit value
k : the key (the unaligned variable-length array of bytes)
length : the length of the key, counting by bytes
initval : can be any 4-byte value
Returns a 32-bit value. Every bit of the key affects every bit of
the return value. Two keys differing by one or two bits will have
totally different hash values.
The best hash table sizes are powers of 2. There is no need to do
mod a prime (mod is sooo slow!). If you need less than 32 bits,
use a bitmask. For example, if you need only 10 bits, do
h = (h & hashmask(10));
In which case, the hash table should have hashsize(10) elements.
If you are hashing n strings (uint8_t **)k, do it like this:
for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
code any way you wish, private, educational, or commercial. It's free.
Use for hash table lookup, or anything where one collision in 2^^32 is
acceptable. Do NOT use for cryptographic purposes.
-------------------------------------------------------------------------------
*/
static uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
{
uint32_t a,b,c; /* internal state */
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
/* Set up the internal state */
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
u.ptr = key;
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
#ifdef VALGRIND
const uint8_t *k8;
#endif
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while (length > 12)
{
a += k[0];
b += k[1];
c += k[2];
mix(a,b,c);
length -= 12;
k += 3;
}
/*----------------------------- handle the last (probably partial) block */
/*
* "k[2]&0xffffff" actually reads beyond the end of the string, but
* then masks off the part it's not allowed to read. Because the
* string is aligned, the masked-off tail is in the same word as the
* rest of the string. Every machine with memory protection I've seen
* does it on word boundaries, so is OK with this. But VALGRIND will
* still catch it and complain. The masking trick does make the hash
* noticably faster for short strings (like English words).
*/
#ifndef VALGRIND
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
case 5 : b+=k[1]&0xff; a+=k[0]; break;
case 4 : a+=k[0]; break;
case 3 : a+=k[0]&0xffffff; break;
case 2 : a+=k[0]&0xffff; break;
case 1 : a+=k[0]&0xff; break;
case 0 : return c; /* zero length strings require no mixing */
}
#else /* make valgrind happy */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
case 9 : c+=k8[8]; /* fall through */
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
case 5 : b+=k8[4]; /* fall through */
case 4 : a+=k[0]; break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
case 1 : a+=k8[0]; break;
case 0 : return c;
}
#endif /* !valgrind */
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
const uint8_t *k8;
/*--------------- all but last block: aligned reads and different mixing */
while (length > 12)
{
a += k[0] + (((uint32_t)k[1])<<16);
b += k[2] + (((uint32_t)k[3])<<16);
c += k[4] + (((uint32_t)k[5])<<16);
mix(a,b,c);
length -= 12;
k += 6;
}
/*----------------------------- handle the last (probably partial) block */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
case 10: c+=k[4];
b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 9 : c+=k8[8]; /* fall through */
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=k[2];
a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 5 : b+=k8[4]; /* fall through */
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=k[0];
break;
case 1 : a+=k8[0];
break;
case 0 : return c; /* zero length requires no mixing */
}
} else { /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
a += k[0];
a += ((uint32_t)k[1])<<8;
a += ((uint32_t)k[2])<<16;
a += ((uint32_t)k[3])<<24;
b += k[4];
b += ((uint32_t)k[5])<<8;
b += ((uint32_t)k[6])<<16;
b += ((uint32_t)k[7])<<24;
c += k[8];
c += ((uint32_t)k[9])<<8;
c += ((uint32_t)k[10])<<16;
c += ((uint32_t)k[11])<<24;
mix(a,b,c);
length -= 12;
k += 12;
}
/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length) /* all the case statements fall through */
{
case 12: c+=((uint32_t)k[11])<<24;
case 11: c+=((uint32_t)k[10])<<16;
case 10: c+=((uint32_t)k[9])<<8;
case 9 : c+=k[8];
case 8 : b+=((uint32_t)k[7])<<24;
case 7 : b+=((uint32_t)k[6])<<16;
case 6 : b+=((uint32_t)k[5])<<8;
case 5 : b+=k[4];
case 4 : a+=((uint32_t)k[3])<<24;
case 3 : a+=((uint32_t)k[2])<<16;
case 2 : a+=((uint32_t)k[1])<<8;
case 1 : a+=k[0];
break;
case 0 : return c;
}
}
final(a,b,c);
return c;
}

View File

@ -10,23 +10,11 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_INTTYPES_H
/* inttypes.h includes stdint.h in a standard environment, so there's
no need to include stdint.h separately. If inttypes.h doesn't define
int32_t, it's defined in config.h. */
#include <inttypes.h>
#endif /* HAVE_INTTYPES_H */
#else /* !HAVE_CONFIG_H */
#ifdef _WIN32
typedef int int32_t;
#else /* !_WIN32 */
/* Assume a standard environment */
#include <inttypes.h>
#endif /* _WIN32 */
#endif /* HAVE_CONFIG_H */
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
int utf8_encode(int codepoint, char *buffer, int *size);

View File

@ -9,11 +9,19 @@
#define _GNU_SOURCE
#endif
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
#include "jansson.h"
#include "hashtable.h"
#include "jansson_private.h"
@ -36,11 +44,19 @@ static JSON_INLINE void json_init(json_t *json, json_type type)
/*** object ***/
extern volatile uint32_t hashtable_seed;
json_t *json_object(void)
{
json_object_t *object = jsonp_malloc(sizeof(json_object_t));
if(!object)
return NULL;
if (!hashtable_seed) {
/* Autoseed */
json_object_seed(0);
}
json_init(&object->json, JSON_OBJECT);
if(hashtable_init(&object->hashtable))

View File

@ -37,6 +37,8 @@ struct config {
int sort_keys;
int strip;
int use_env;
int have_hashseed;
int hashseed;
} conf;
#define l_isspace(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || (c) == '\t')
@ -108,6 +110,12 @@ static void read_conf(FILE *conffile)
conf.sort_keys = atoi(val);
if (!strcmp(line, "STRIP"))
conf.strip = atoi(val);
if (!strcmp(line, "HASHSEED")) {
conf.have_hashseed = 1;
conf.hashseed = atoi(val);
} else {
conf.have_hashseed = 0;
}
}
free(buffer);
@ -188,6 +196,9 @@ int use_conf(char *test_path)
if (conf.sort_keys)
flags |= JSON_SORT_KEYS;
if (conf.have_hashseed)
json_object_seed(conf.hashseed);
if (conf.strip) {
/* Load to memory, strip leading and trailing whitespace */
buffer = loadfile(infile);
@ -265,7 +276,10 @@ int use_env()
flags |= JSON_PRESERVE_ORDER;
if(getenv_int("JSON_SORT_KEYS"))
flags |= JSON_SORT_KEYS;
flags |= JSON_SORT_KEYS;
if(getenv("HASHSEED"))
json_object_seed(getenv_int("HASHSEED"));
if(getenv_int("STRIP")) {
/* Load to memory, strip leading and trailing whitespace */

View File

@ -24,13 +24,13 @@ static void create_and_free_complex_object()
static void *my_malloc(size_t size)
{
malloc_called += 1;
malloc_called = 1;
return malloc(size);
}
static void my_free(void *ptr)
{
free_called += 1;
free_called = 1;
free(ptr);
}
@ -39,7 +39,7 @@ static void test_simple()
json_set_alloc_funcs(my_malloc, my_free);
create_and_free_complex_object();
if(malloc_called != 20 || free_called != 20)
if(malloc_called != 1 || free_called != 1)
fail("Custom allocation failed");
}

View File

@ -249,7 +249,11 @@ static void test_set_nocheck()
static void test_iterators()
{
int i;
json_t *object, *foo, *bar, *baz;
const char *iter_keys[3];
int have_key[3] = { 0, 0, 0 };
json_t *iter_values[3];
void *iter;
if(json_object_iter(NULL))
@ -276,30 +280,50 @@ static void test_iterators()
iter = json_object_iter(object);
if(!iter)
fail("unable to get iterator");
if(strcmp(json_object_iter_key(iter), "a"))
fail("iterating failed: wrong key");
if(json_object_iter_value(iter) != foo)
fail("iterating failed: wrong value");
iter_keys[0] = json_object_iter_key(iter);
iter_values[0] = json_object_iter_value(iter);
iter = json_object_iter_next(object, iter);
if(!iter)
fail("unable to increment iterator");
if(strcmp(json_object_iter_key(iter), "b"))
fail("iterating failed: wrong key");
if(json_object_iter_value(iter) != bar)
fail("iterating failed: wrong value");
iter_keys[1] = json_object_iter_key(iter);
iter_values[1] = json_object_iter_value(iter);
iter = json_object_iter_next(object, iter);
if(!iter)
fail("unable to increment iterator");
if(strcmp(json_object_iter_key(iter), "c"))
fail("iterating failed: wrong key");
if(json_object_iter_value(iter) != baz)
fail("iterating failed: wrong value");
iter_keys[2] = json_object_iter_key(iter);
iter_values[2] = json_object_iter_value(iter);
if(json_object_iter_next(object, iter) != NULL)
fail("able to iterate over the end");
/* Check that keys have correct values */
for (i = 0; i < 3; i++) {
if (strcmp(iter_keys[i], "a") == 0) {
if (iter_values[i] != foo)
fail("wrong value for iter key a");
else
have_key[0] = 1;
} else if (strcmp(iter_keys[i], "b") == 0) {
if (iter_values[i] != bar)
fail("wrong value for iter key b");
else
have_key[1] = 1;
} else if (strcmp(iter_keys[i], "c") == 0) {
if (iter_values[i] != baz)
fail("wrong value for iter key c");
else
have_key[2] = 1;
}
}
/* Check that we got all keys */
for(i = 0; i < 3; i++) {
if(!have_key[i])
fail("a key wasn't iterated over");
}
if(json_object_iter_at(object, "foo"))
fail("json_object_iter_at() succeeds for non-existent key");
@ -312,22 +336,14 @@ static void test_iterators()
if(json_object_iter_value(iter) != bar)
fail("iterating failed: wrong value");
iter = json_object_iter_next(object, iter);
if(!iter)
fail("unable to increment iterator");
if(strcmp(json_object_iter_key(iter), "c"))
fail("iterating failed: wrong key");
if(json_object_iter_value(iter) != baz)
fail("iterating failed: wrong value");
if(json_object_iter_set(object, iter, bar))
if(json_object_iter_set(object, iter, baz))
fail("unable to set value at iterator");
if(strcmp(json_object_iter_key(iter), "c"))
if(strcmp(json_object_iter_key(iter), "b"))
fail("json_object_iter_key() fails after json_object_iter_set()");
if(json_object_iter_value(iter) != bar)
if(json_object_iter_value(iter) != baz)
fail("json_object_iter_value() fails after json_object_iter_set()");
if(json_object_get(object, "c") != bar)
if(json_object_get(object, "b") != baz)
fail("json_object_get() fails after json_object_iter_set()");
json_decref(object);

View File

@ -1,2 +1,3 @@
JSON_COMPACT=1
export JSON_COMPACT
HASHSEED=1
export JSON_COMPACT HASHSEED

View File

@ -1,3 +1,4 @@
JSON_INDENT=4
JSON_COMPACT=1
export JSON_INDENT JSON_COMPACT
HASHSEED=1
export JSON_INDENT JSON_COMPACT HASHSEED

View File

@ -1,2 +1,3 @@
JSON_INDENT=4
export JSON_INDENT
HASHSEED=1
export JSON_INDENT HASHSEED

View File

@ -0,0 +1,2 @@
HASHSEED=1
export HASHSEED