mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added a set of new LSH based hashing functions meant for use
with larger vectors and high bit sizes than the current LSH tool. These are the new hash_similar_angles_xxx objects.
This commit is contained in:
parent
6f5ef4c089
commit
4e96485601
@ -6,6 +6,7 @@
|
||||
|
||||
#include "lsh/projection_hash.h"
|
||||
#include "lsh/create_random_projection_hash.h"
|
||||
#include "lsh/hashes.h"
|
||||
|
||||
|
||||
#endif // DLIB_LSh_
|
||||
|
218
dlib/lsh/hashes.h
Normal file
218
dlib/lsh/hashes.h
Normal file
@ -0,0 +1,218 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_LSH_HAShES_H__
|
||||
#define DLIB_LSH_HAShES_H__
|
||||
|
||||
#include "hashes_abstract.h"
|
||||
#include "../hash.h"
|
||||
#include "../matrix.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class hash_similar_angles_64
|
||||
{
|
||||
public:
|
||||
hash_similar_angles_64 (
|
||||
) : seed(0) {}
|
||||
|
||||
hash_similar_angles_64 (
|
||||
const uint64 seed_
|
||||
) : seed(seed_) {}
|
||||
|
||||
uint64 get_seed (
|
||||
) const { return seed; }
|
||||
|
||||
|
||||
typedef uint64 result_type;
|
||||
|
||||
template <
|
||||
typename sparse_vector_type
|
||||
>
|
||||
typename disable_if<is_matrix<sparse_vector_type>,uint64>::type operator() (
|
||||
const sparse_vector_type& v
|
||||
) const
|
||||
{
|
||||
typedef typename sparse_vector_type::value_type::second_type scalar_type;
|
||||
|
||||
uint64 temp = 0;
|
||||
for (int i = 0; i < 64; ++i)
|
||||
{
|
||||
// compute the dot product between v and a Gaussian random vector.
|
||||
scalar_type val = 0;
|
||||
for (typename sparse_vector_type::const_iterator j = v.begin(); j != v.end(); ++j)
|
||||
val += j->second*gaussian_random_hash(j->first, i, seed);
|
||||
|
||||
if (val > 0)
|
||||
temp |= 1;
|
||||
temp <<= 1;
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <typename EXP>
|
||||
uint64 operator() (
|
||||
const matrix_exp<EXP>& v
|
||||
) const
|
||||
{
|
||||
uint64 temp = 0;
|
||||
for (unsigned long i = 0; i < 64; ++i)
|
||||
{
|
||||
if (dot(gaussian_randm(v.size(),1,i+seed*64), v) > 0)
|
||||
temp |= 1;
|
||||
temp <<= 1;
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const
|
||||
{
|
||||
return hamming_distance(a,b);
|
||||
}
|
||||
|
||||
private:
|
||||
const uint64 seed;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class hash_similar_angles_128
|
||||
{
|
||||
public:
|
||||
hash_similar_angles_128 (
|
||||
) : seed(0),hasher1(0), hasher2(1) {}
|
||||
|
||||
hash_similar_angles_128 (
|
||||
const uint64 seed_
|
||||
) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
|
||||
|
||||
uint64 get_seed (
|
||||
) const { return seed; }
|
||||
|
||||
typedef std::pair<uint64,uint64> result_type;
|
||||
|
||||
template <
|
||||
typename vector_type
|
||||
>
|
||||
result_type operator() (
|
||||
const vector_type& v
|
||||
) const
|
||||
{
|
||||
return std::make_pair(hasher1(v), hasher2(v));
|
||||
}
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const
|
||||
{
|
||||
return hamming_distance(a.first,b.first) +
|
||||
hamming_distance(a.second,b.second);
|
||||
}
|
||||
|
||||
private:
|
||||
const uint64 seed;
|
||||
hash_similar_angles_64 hasher1;
|
||||
hash_similar_angles_64 hasher2;
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class hash_similar_angles_256
|
||||
{
|
||||
public:
|
||||
hash_similar_angles_256 (
|
||||
) : seed(0), hasher1(0), hasher2(1) {}
|
||||
|
||||
hash_similar_angles_256 (
|
||||
const uint64 seed_
|
||||
) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
|
||||
|
||||
uint64 get_seed (
|
||||
) const { return seed; }
|
||||
|
||||
typedef std::pair<uint64,uint64> hash128_type;
|
||||
typedef std::pair<hash128_type,hash128_type> result_type;
|
||||
|
||||
template <
|
||||
typename vector_type
|
||||
>
|
||||
result_type operator() (
|
||||
const vector_type& v
|
||||
) const
|
||||
{
|
||||
return std::make_pair(hasher1(v), hasher2(v));
|
||||
}
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const
|
||||
{
|
||||
return hasher1.distance(a.first,b.first) +
|
||||
hasher1.distance(a.second,b.second);
|
||||
}
|
||||
|
||||
private:
|
||||
const uint64 seed;
|
||||
hash_similar_angles_128 hasher1;
|
||||
hash_similar_angles_128 hasher2;
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class hash_similar_angles_512
|
||||
{
|
||||
public:
|
||||
hash_similar_angles_512 (
|
||||
) : seed(0), hasher1(0), hasher2(1) {}
|
||||
|
||||
hash_similar_angles_512 (
|
||||
const uint64 seed_
|
||||
) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
|
||||
|
||||
uint64 get_seed (
|
||||
) const { return seed; }
|
||||
|
||||
|
||||
typedef hash_similar_angles_256::result_type hash256_type;
|
||||
typedef std::pair<hash256_type,hash256_type> result_type;
|
||||
|
||||
template <
|
||||
typename vector_type
|
||||
>
|
||||
result_type operator() (
|
||||
const vector_type& v
|
||||
) const
|
||||
{
|
||||
return std::make_pair(hasher1(v), hasher2(v));
|
||||
}
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const
|
||||
{
|
||||
return hasher1.distance(a.first,b.first) +
|
||||
hasher1.distance(a.second,b.second);
|
||||
}
|
||||
|
||||
private:
|
||||
const uint64 seed;
|
||||
hash_similar_angles_256 hasher1;
|
||||
hash_similar_angles_256 hasher2;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_LSH_HAShES_H__
|
||||
|
286
dlib/lsh/hashes_abstract.h
Normal file
286
dlib/lsh/hashes_abstract.h
Normal file
@ -0,0 +1,286 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_LSH_HAShES_ABSTRACT_H__
|
||||
#ifdef DLIB_LSH_HAShES_ABSTRACT_H__
|
||||
|
||||
#include "../matrix.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class hash_similar_angles_64
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for computing locality sensitive hashes that give
|
||||
vectors with similar angles between each other similar hash values. In
|
||||
particular, this object creates 64 random planes which pass though the
|
||||
origin and uses them to create a 64bit hash. To compute the hash for a new
|
||||
vector, this object checks which side of each plane the vector falls on and
|
||||
records this information into a 64bit integer.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
hash_similar_angles_64 (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == 0
|
||||
!*/
|
||||
|
||||
hash_similar_angles_64 (
|
||||
const uint64 seed
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == seed
|
||||
!*/
|
||||
|
||||
uint64 get_seed (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the random seed used to generate the random planes used for
|
||||
hashing.
|
||||
!*/
|
||||
|
||||
typedef uint64 result_type;
|
||||
|
||||
template <typename vector_type>
|
||||
result_type perator() (
|
||||
const vector_type& v
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- v is an unsorted sparse vector or a dlib matrix representing either a
|
||||
column or row vector.
|
||||
ensures
|
||||
- returns a 64 bit hash of the input vector v. The bits in the hash record
|
||||
which side of each random plane v falls on.
|
||||
|
||||
!*/
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the Hamming distance between the two hashes given to this
|
||||
function. That is, we return the number of bits in a and b which differ.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct hash_similar_angles_128
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for computing locality sensitive hashes that give
|
||||
vectors with similar angles between each other similar hash values. In
|
||||
particular, this object creates 128 random planes which pass though the
|
||||
origin and uses them to create a 128bit hash. To compute the hash for a new
|
||||
vector, this object checks which side of each plane the vector falls on and
|
||||
records this information into a 128bit integer.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
hash_similar_angles_128 (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == 0
|
||||
!*/
|
||||
|
||||
hash_similar_angles_128 (
|
||||
const uint64 seed
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == seed
|
||||
!*/
|
||||
|
||||
uint64 get_seed (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the random seed used to generate the random planes used for
|
||||
hashing.
|
||||
!*/
|
||||
|
||||
typedef std::pair<uint64,uint64> result_type;
|
||||
|
||||
template <typename vector_type>
|
||||
result_type perator() (
|
||||
const vector_type& v
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- v is an unsorted sparse vector or a dlib matrix representing either a
|
||||
column or row vector.
|
||||
ensures
|
||||
- returns a 128 bit hash of the input vector v. The bits in the hash record
|
||||
which side of each random plane v falls on.
|
||||
|
||||
!*/
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the Hamming distance between the two hashes given to this
|
||||
function. That is, we return the number of bits in a and b which differ.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct hash_similar_angles_256
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for computing locality sensitive hashes that give
|
||||
vectors with similar angles between each other similar hash values. In
|
||||
particular, this object creates 256 random planes which pass though the
|
||||
origin and uses them to create a 256bit hash. To compute the hash for a new
|
||||
vector, this object checks which side of each plane the vector falls on and
|
||||
records this information into a 256bit integer.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
hash_similar_angles_256 (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == 0
|
||||
!*/
|
||||
|
||||
hash_similar_angles_256 (
|
||||
const uint64 seed
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == seed
|
||||
!*/
|
||||
|
||||
uint64 get_seed (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the random seed used to generate the random planes used for
|
||||
hashing.
|
||||
!*/
|
||||
|
||||
typedef std::pair<uint64,uint64> hash128_type;
|
||||
typedef std::pair<hash128_type,hash128_type> result_type;
|
||||
|
||||
template <typename vector_type>
|
||||
result_type perator() (
|
||||
const vector_type& v
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- v is an unsorted sparse vector or a dlib matrix representing either a
|
||||
column or row vector.
|
||||
ensures
|
||||
- returns a 256 bit hash of the input vector v. The bits in the hash record
|
||||
which side of each random plane v falls on.
|
||||
|
||||
!*/
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the Hamming distance between the two hashes given to this
|
||||
function. That is, we return the number of bits in a and b which differ.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct hash_similar_angles_512
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for computing locality sensitive hashes that give
|
||||
vectors with similar angles between each other similar hash values. In
|
||||
particular, this object creates 512 random planes which pass though the
|
||||
origin and uses them to create a 512bit hash. To compute the hash for a new
|
||||
vector, this object checks which side of each plane the vector falls on and
|
||||
records this information into a 512bit integer.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
hash_similar_angles_512 (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == 0
|
||||
!*/
|
||||
|
||||
hash_similar_angles_512 (
|
||||
const uint64 seed
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_seed() == seed
|
||||
!*/
|
||||
|
||||
uint64 get_seed (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the random seed used to generate the random planes used for
|
||||
hashing.
|
||||
!*/
|
||||
|
||||
typedef hash_similar_angles_256::result_type hash256_type;
|
||||
typedef std::pair<hash256_type,hash256_type> result_type;
|
||||
|
||||
template <typename vector_type>
|
||||
result_type perator() (
|
||||
const vector_type& v
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- v is an unsorted sparse vector or a dlib matrix representing either a
|
||||
column or row vector.
|
||||
ensures
|
||||
- returns a 512 bit hash of the input vector v. The bits in the hash record
|
||||
which side of each random plane v falls on.
|
||||
|
||||
!*/
|
||||
|
||||
unsigned int distance (
|
||||
const result_type& a,
|
||||
const result_type& b
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the Hamming distance between the two hashes given to this
|
||||
function. That is, we return the number of bits in a and b which differ.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_LSH_HAShES_ABSTRACT_H__
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user