Added cca()

2024-11-01 10:14:53 +08:00 · 2013-01-13 23:06:52 -05:00 · 2013-01-13 23:06:52 -05:00 · fd9ed7397f
commit fd9ed7397f
parent 3dfa76a339
3 changed files with 259 additions and 0 deletions
--- a/dlib/statistics.h
+++ b/dlib/statistics.h
@ -8,6 +8,7 @@
 #include "statistics/random_subset_selector.h"
 #include "statistics/image_feature_sampling.h"
 #include "statistics/sammon.h"
 #include "statistics/cca.h"
 #endif // DLIB_STATISTICs_H_ 
--- a/dlib/statistics/cca.h
+++ b/dlib/statistics/cca.h
@ -0,0 +1,122 @@
 // Copyright (C) 2013  Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.
 #ifndef DLIB_CCA_h__
 #define DLIB_CCA_h__
 #include "cca_abstract.h"
 #include "../algs.h"
 #include "../matrix.h"
 namespace dlib
 {
 // ----------------------------------------------------------------------------------------
    template <
        typename T
        >
    matrix<T,0,1> compute_correlations (
        const matrix<T>& L,
        const matrix<T>& R
    )
    {
        matrix<T> A, B, C;
        A = diag(trans(R)*L);
        B = sqrt(diag(trans(L)*L));
        C = sqrt(diag(trans(R)*R));
        A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C)));
        return A;
    }
 // ----------------------------------------------------------------------------------------
    template <
        typename matrix_type, 
        typename T
        >
    matrix<T,0,1> impl_cca (
        const matrix_type& L,
        const matrix_type& R,
        matrix<T>& Ltrans,
        matrix<T>& Rtrans,
        unsigned long num_correlations,
        unsigned long extra_rank,
        unsigned long q,
        unsigned long num_output_correlations
    )
    {
        matrix<T> Ul, Vl;
        matrix<T> Ur, Vr;
        matrix<T> U, V;
        matrix<T,0,1> Dr, Dl, D;
        // Note that we add a few more singular vectors in because it helps improve the
        // final results if we run this part with a little higher rank than the final SVD.
        svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q);
        svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q);
        // This matrix is really small so we can do a normal full SVD on it.
        svd3(trans(Ul)*Ur, U, D, V);
        // now throw away extra columns of the transformations.  We do this in a way
        // that keeps the directions that have the highest correlations.
        matrix<T,0,1> temp = D;
        rsort_columns(U, temp);
        rsort_columns(V, D);
        U = colm(U, range(0, num_output_correlations-1));
        V = colm(V, range(0, num_output_correlations-1));
        D = rowm(D, range(0, num_output_correlations-1));
        Ltrans = Vl*inv(diagm(Dl))*U;
        Rtrans = Vr*inv(diagm(Dr))*V;
        // Note that the D matrix contains the correlation values for the transformed
        // vectors.  However, when the L and R matrices have rank higher than
        // num_correlations+extra_rank then the values in D become only approximate.
        return D; 
    }
 // ----------------------------------------------------------------------------------------
    template <typename T>
    matrix<T,0,1> cca (
        const matrix<T>& L,
        const matrix<T>& R,
        matrix<T>& Ltrans,
        matrix<T>& Rtrans,
        unsigned long num_correlations,
        unsigned long extra_rank = 5,
        unsigned long q = 2
    )
    {
        using std::min;
        const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc())));
        return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n); 
    }
 // ----------------------------------------------------------------------------------------
    template <typename sparse_vector_type, typename T>
    matrix<T,0,1> cca (
        const std::vector<sparse_vector_type>& L,
        const std::vector<sparse_vector_type>& R,
        matrix<T>& Ltrans,
        matrix<T>& Rtrans,
        unsigned long num_correlations,
        unsigned long extra_rank = 5,
        unsigned long q = 2
    )
    {
        using std::min;
        const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R));
        const unsigned long num_output_correlations = min(num_correlations, min(R.size(),n));
        return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations); 
    }
 // ----------------------------------------------------------------------------------------
 }
 #endif // DLIB_CCA_h__
--- a/dlib/statistics/cca_abstract.h
+++ b/dlib/statistics/cca_abstract.h
@ -0,0 +1,136 @@
 // Copyright (C) 2013  Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.
 #undef DLIB_CCA_AbSTRACT_H__
 #ifdef DLIB_CCA_AbSTRACT_H__
 #include "../matrix/matrix_la_abstract.h"
 namespace dlib
 {
 // ----------------------------------------------------------------------------------------
    template <
        typename T
        >
    matrix<T,0,1> compute_correlations (
        const matrix<T>& L,
        const matrix<T>& R
    );
    /*!
        requires
            - L.size() > 0 
            - R.size() > 0 
            - L.nr() == R.nr()
        ensures
            - This function treats L and R as sequences of paired row vectors.  It
              then computes the correlation values between the elements of these 
              row vectors.  In particular, we return a matrix COR such that:
                - COR.size() == L.nc()
                - for all valid i:
                    - COR(i) == the correlation coefficient between the following 
                      sequence of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr().
                      Therefore, COR(i) is a value between -1 and 1 inclusive where 1
                      indicates perfect correlation and -1 perfect anti-correlation.
    !*/
 // ----------------------------------------------------------------------------------------
    template <
        typename T
        >
    matrix<T,0,1> cca (
        const matrix<T>& L,
        const matrix<T>& R,
        matrix<T>& Ltrans,
        matrix<T>& Rtrans,
        unsigned long num_correlations,
        unsigned long extra_rank = 5,
        unsigned long q = 2
    );
    /*!
        requires
            - num_correlations > 0
            - L.size() > 0 
            - R.size() > 0 
            - L.nr() == R.nr()
        ensures
            - This function performs a canonical correlation analysis between the row
              vectors in L and R.  That is, it finds two transformation matrices, Ltrans
              and Rtrans, such that row vectors in the transformed matrices L*Ltrans and
              R*Rtrans are as correlated as possible.  That is, we try to find two transforms
              such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans)
              would be maximized.
            - Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc())))
              (This is the actual number of elements in the transformed vectors.
              Therefore, note that you can't get more outputs than there are rows or
              columns in the input matrices.)
            - #Ltrans.nr() == L.nc()
            - #Ltrans.nc() == N 
            - #Rtrans.nr() == R.nc()
            - #Rtrans.nc() == N 
            - No centering is applied to the L and R matrices.  Therefore, if you want a
              CCA relative to the centered vectors then you must apply centering yourself
              before calling cca().
            - This function works with reduced rank approximations of the L and R matrices.
              This makes it fast when working with large matrices.  In particular, we use
              the svd_fast() routine to find reduced rank representations of the input
              matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) 
              and similarly for R.  This means that you can use the extra_rank and q
              arguments to cca() to influence the accuracy of the reduced rank
              approximation.  However, the default values should work fine for most
              problems.
            - returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans).  The
              returned vector should exactly match the output of compute_correlations()
              when the reduced rank approximation to L and R is accurate.  However, when L
              and/or R are higher rank than num_correlations+extra_rank the return value of
              this function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans).
              This deviation can be used to check if the reduced rank approximation is
              working or you need to increase extra_rank.
            - A good discussion of CCA can be found in the paper "Canonical Correlation
              Analysis" by David Weenink.  In particular, this function is implemented
              using equations 29 and 30 from his paper.  We also use the idea of doing CCA
              on a reduced rank approximation of L and R as suggested by Paramveer S.
              Dhillon in his paper "Two Step CCA: A new spectral method for estimating
              vector models of words".
    !*/
 // ----------------------------------------------------------------------------------------
    template <
        typename sparse_vector_type, 
        typename T
        >
    matrix<T,0,1> cca (
        const std::vector<sparse_vector_type>& L,
        const std::vector<sparse_vector_type>& R,
        matrix<T>& Ltrans,
        matrix<T>& Rtrans,
        unsigned long num_correlations,
        unsigned long extra_rank = 5,
        unsigned long q = 2
    );
    /*!
        requires
            - num_correlations > 0
            - L.size() == R.size()
            - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
              (i.e. L and R can't be a empty matrices)
        ensures
            - This is just an overload of the cca() function defined above.  Except in this
              case we take a sparse representation of the input L and R matrices rather than
              dense matrices.  Therefore, in this case, we interpret L and R as matrices
              with L.size() rows, where each row is defined by a sparse vector.  So this 
              function does exactly the same thing as the above cca().
            - Note that you can apply the output transforms to a sparse vector with the
              following code:
                sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector)
    !*/
 // ----------------------------------------------------------------------------------------
 }
 #endif // DLIB_CCA_AbSTRACT_H__