Added cca()

This commit is contained in:
Davis King 2013-01-13 23:06:52 -05:00
parent 3dfa76a339
commit fd9ed7397f
3 changed files with 259 additions and 0 deletions

View File

@ -8,6 +8,7 @@
#include "statistics/random_subset_selector.h"
#include "statistics/image_feature_sampling.h"
#include "statistics/sammon.h"
#include "statistics/cca.h"
#endif // DLIB_STATISTICs_H_

122
dlib/statistics/cca.h Normal file
View File

@ -0,0 +1,122 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_CCA_h__
#define DLIB_CCA_h__
#include "cca_abstract.h"
#include "../algs.h"
#include "../matrix.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
matrix<T,0,1> compute_correlations (
const matrix<T>& L,
const matrix<T>& R
)
{
matrix<T> A, B, C;
A = diag(trans(R)*L);
B = sqrt(diag(trans(L)*L));
C = sqrt(diag(trans(R)*R));
A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C)));
return A;
}
// ----------------------------------------------------------------------------------------
template <
typename matrix_type,
typename T
>
matrix<T,0,1> impl_cca (
const matrix_type& L,
const matrix_type& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank,
unsigned long q,
unsigned long num_output_correlations
)
{
matrix<T> Ul, Vl;
matrix<T> Ur, Vr;
matrix<T> U, V;
matrix<T,0,1> Dr, Dl, D;
// Note that we add a few more singular vectors in because it helps improve the
// final results if we run this part with a little higher rank than the final SVD.
svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q);
svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q);
// This matrix is really small so we can do a normal full SVD on it.
svd3(trans(Ul)*Ur, U, D, V);
// now throw away extra columns of the transformations. We do this in a way
// that keeps the directions that have the highest correlations.
matrix<T,0,1> temp = D;
rsort_columns(U, temp);
rsort_columns(V, D);
U = colm(U, range(0, num_output_correlations-1));
V = colm(V, range(0, num_output_correlations-1));
D = rowm(D, range(0, num_output_correlations-1));
Ltrans = Vl*inv(diagm(Dl))*U;
Rtrans = Vr*inv(diagm(Dr))*V;
// Note that the D matrix contains the correlation values for the transformed
// vectors. However, when the L and R matrices have rank higher than
// num_correlations+extra_rank then the values in D become only approximate.
return D;
}
// ----------------------------------------------------------------------------------------
template <typename T>
matrix<T,0,1> cca (
const matrix<T>& L,
const matrix<T>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2
)
{
using std::min;
const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc())));
return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n);
}
// ----------------------------------------------------------------------------------------
template <typename sparse_vector_type, typename T>
matrix<T,0,1> cca (
const std::vector<sparse_vector_type>& L,
const std::vector<sparse_vector_type>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2
)
{
using std::min;
const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R));
const unsigned long num_output_correlations = min(num_correlations, min(R.size(),n));
return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CCA_h__

View File

@ -0,0 +1,136 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_CCA_AbSTRACT_H__
#ifdef DLIB_CCA_AbSTRACT_H__
#include "../matrix/matrix_la_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
matrix<T,0,1> compute_correlations (
const matrix<T>& L,
const matrix<T>& R
);
/*!
requires
- L.size() > 0
- R.size() > 0
- L.nr() == R.nr()
ensures
- This function treats L and R as sequences of paired row vectors. It
then computes the correlation values between the elements of these
row vectors. In particular, we return a matrix COR such that:
- COR.size() == L.nc()
- for all valid i:
- COR(i) == the correlation coefficient between the following
sequence of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr().
Therefore, COR(i) is a value between -1 and 1 inclusive where 1
indicates perfect correlation and -1 perfect anti-correlation.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
matrix<T,0,1> cca (
const matrix<T>& L,
const matrix<T>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2
);
/*!
requires
- num_correlations > 0
- L.size() > 0
- R.size() > 0
- L.nr() == R.nr()
ensures
- This function performs a canonical correlation analysis between the row
vectors in L and R. That is, it finds two transformation matrices, Ltrans
and Rtrans, such that row vectors in the transformed matrices L*Ltrans and
R*Rtrans are as correlated as possible. That is, we try to find two transforms
such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans)
would be maximized.
- Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc())))
(This is the actual number of elements in the transformed vectors.
Therefore, note that you can't get more outputs than there are rows or
columns in the input matrices.)
- #Ltrans.nr() == L.nc()
- #Ltrans.nc() == N
- #Rtrans.nr() == R.nc()
- #Rtrans.nc() == N
- No centering is applied to the L and R matrices. Therefore, if you want a
CCA relative to the centered vectors then you must apply centering yourself
before calling cca().
- This function works with reduced rank approximations of the L and R matrices.
This makes it fast when working with large matrices. In particular, we use
the svd_fast() routine to find reduced rank representations of the input
matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q)
and similarly for R. This means that you can use the extra_rank and q
arguments to cca() to influence the accuracy of the reduced rank
approximation. However, the default values should work fine for most
problems.
- returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans). The
returned vector should exactly match the output of compute_correlations()
when the reduced rank approximation to L and R is accurate. However, when L
and/or R are higher rank than num_correlations+extra_rank the return value of
this function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans).
This deviation can be used to check if the reduced rank approximation is
working or you need to increase extra_rank.
- A good discussion of CCA can be found in the paper "Canonical Correlation
Analysis" by David Weenink. In particular, this function is implemented
using equations 29 and 30 from his paper. We also use the idea of doing CCA
on a reduced rank approximation of L and R as suggested by Paramveer S.
Dhillon in his paper "Two Step CCA: A new spectral method for estimating
vector models of words".
!*/
// ----------------------------------------------------------------------------------------
template <
typename sparse_vector_type,
typename T
>
matrix<T,0,1> cca (
const std::vector<sparse_vector_type>& L,
const std::vector<sparse_vector_type>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2
);
/*!
requires
- num_correlations > 0
- L.size() == R.size()
- max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
(i.e. L and R can't be a empty matrices)
ensures
- This is just an overload of the cca() function defined above. Except in this
case we take a sparse representation of the input L and R matrices rather than
dense matrices. Therefore, in this case, we interpret L and R as matrices
with L.size() rows, where each row is defined by a sparse vector. So this
function does exactly the same thing as the above cca().
- Note that you can apply the output transforms to a sparse vector with the
following code:
sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector)
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CCA_AbSTRACT_H__