mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added comments about using randomly_subsample() when using compute_mean_squared_distance()
with large datasets. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403815
This commit is contained in:
parent
07093165ce
commit
3eb5d81644
@ -51,8 +51,9 @@ int main()
|
||||
// Here we set the kernel we want to use for training. The radial_basis_kernel
|
||||
// has a parameter called gamma that we need to determine. As a rule of thumb, a good
|
||||
// gamma to try is 1.0/(mean squared distance between your sample points). So
|
||||
// below we are using a similar value.
|
||||
const double gamma = 3.0/compute_mean_squared_distance(samples);
|
||||
// below we are using a similar value computed from at most 2000 randomly selected
|
||||
// samples.
|
||||
const double gamma = 3.0/compute_mean_squared_distance(randomly_subsample(samples, 2000));
|
||||
cout << "using gamma of " << gamma << endl;
|
||||
trainer.set_kernel(kernel_type(gamma));
|
||||
|
||||
|
@ -101,8 +101,9 @@ int main()
|
||||
// you should try the same gamma that you are using for training. But if you don't
|
||||
// have a particular gamma in mind then you can use the following function to
|
||||
// find a reasonable default gamma for your data. Another reasonable way to pick a gamma
|
||||
// is often to use 1.0/compute_mean_squared_distance(samples). This second way has the
|
||||
// bonus of being quite fast.
|
||||
// is often to use 1.0/compute_mean_squared_distance(randomly_subsample(samples, 2000)).
|
||||
// It computes the mean squared distance between 2000 randomly selected samples and often
|
||||
// works quite well.
|
||||
const double gamma = verbose_find_gamma_with_big_centroid_gap(samples, labels);
|
||||
|
||||
// Next we declare an instance of the kcentroid object. It is used by rank_features()
|
||||
|
Loading…
Reference in New Issue
Block a user