From e341600ffb46733e588dcbda8a1600f583248acd Mon Sep 17 00:00:00 2001 From: Davis King Date: Mon, 21 Jan 2013 17:06:27 -0500 Subject: [PATCH] Added count_bits() and hamming_distance() --- dlib/general_hash/count_bits.h | 62 +++++++++++++++++++++++++ dlib/general_hash/count_bits_abstract.h | 48 +++++++++++++++++++ dlib/hash.h | 1 + dlib/test/hash.cpp | 51 ++++++++++++++++++++ 4 files changed, 162 insertions(+) create mode 100644 dlib/general_hash/count_bits.h create mode 100644 dlib/general_hash/count_bits_abstract.h diff --git a/dlib/general_hash/count_bits.h b/dlib/general_hash/count_bits.h new file mode 100644 index 000000000..4de771ae0 --- /dev/null +++ b/dlib/general_hash/count_bits.h @@ -0,0 +1,62 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_COUNT_BiTS_H__ +#define DLIB_COUNT_BiTS_H__ + +#include "../algs.h" +#include + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + T count_bits ( + T v + ) + /*! + requires + - T is an unsigned integral type + ensures + - returns the number of bits in v which are set to 1. + !*/ + { + COMPILE_TIME_ASSERT(is_unsigned_type::value && sizeof(T) <= 8); + + // This bit of bit trickery is from: + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 + + v = v - ((v >> 1) & (T)~(T)0/3); + v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); + v = (v + (v >> 4)) & (T)~(T)0/255*15; + return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + T hamming_distance ( + const T& a, + const T& b + ) + /*! + requires + - T is an unsigned integral type + ensures + - returns the number of bits which differ between a and b. + !*/ + { + return count_bits(a^b); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_COUNT_BiTS_H__ + diff --git a/dlib/general_hash/count_bits_abstract.h b/dlib/general_hash/count_bits_abstract.h new file mode 100644 index 000000000..ba3dae583 --- /dev/null +++ b/dlib/general_hash/count_bits_abstract.h @@ -0,0 +1,48 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_COUNT_BiTS_ABSTRACT_H__ +#ifdef DLIB_COUNT_BiTS_ABSTRACT_H__ + + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + T count_bits ( + T v + ); + /*! + requires + - T is an unsigned integral type + ensures + - returns the number of bits in v which are set to 1. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + T hamming_distance ( + const T& a, + const T& b + ); + /*! + requires + - T is an unsigned integral type + ensures + - returns the number of bits which differ between a and b. (I.e. returns + count_bits(a^b).) + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_COUNT_BiTS_ABSTRACT_H__ + + diff --git a/dlib/hash.h b/dlib/hash.h index 82115c698..5a018b438 100644 --- a/dlib/hash.h +++ b/dlib/hash.h @@ -6,6 +6,7 @@ #include "general_hash/hash.h" #include "general_hash/random_hashing.h" +#include "general_hash/count_bits.h" #endif // DLIB_HASh_ diff --git a/dlib/test/hash.cpp b/dlib/test/hash.cpp index 10fb50606..4db326f23 100644 --- a/dlib/test/hash.cpp +++ b/dlib/test/hash.cpp @@ -205,6 +205,55 @@ namespace } } +// ---------------------------------------------------------------------------------------- + + uint64 slow_count_bits ( uint64 v) + { + uint64 count = 0; + for (int i = 0; i < 64; ++i) + { + if (v&1) + ++count; + v >>= 1; + } + return count; + } + + + uint32 slow_count_bits ( uint32 v) + { + uint32 count = 0; + for (int i = 0; i < 32; ++i) + { + if (v&1) + ++count; + v >>= 1; + } + return count; + } + + +// ---------------------------------------------------------------------------------------- + + void test_hamming_stuff() + { + dlib::rand rnd; + for (int i = 0; i < 10000; ++i) + { + uint32 v = rnd.get_random_32bit_number(); + uint64 v2 = rnd.get_random_64bit_number(); + DLIB_TEST(slow_count_bits(v) == count_bits(v)); + DLIB_TEST(slow_count_bits(v2) == count_bits(v2)); + } + + DLIB_TEST(hamming_distance((uint32)0x1F, (uint32)0x0F) == 1); + DLIB_TEST(hamming_distance((uint32)0x1F, (uint32)0x1F) == 0); + DLIB_TEST(hamming_distance((uint32)0x1F, (uint32)0x19) == 2); + DLIB_TEST(hamming_distance((uint32)0x2F, (uint32)0x19) == 4); + } + +// ---------------------------------------------------------------------------------------- + class test_hash : public tester { public: @@ -219,6 +268,8 @@ namespace { print_spinner(); + test_hamming_stuff(); + murmur_hash_test(); murmur_hash_128_test();