mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
I improved the way calls to the separation oracle get dispatched to the
thread_pool. Previously, each separation oracle call was dispatched to a thread individually. This is inefficient when there are a whole lot of samples (and thus separation oracle calls which need to be made). So now entire batches of separation oracle calls are dispatched to each thread. This minimizes the thread switching and synchronization overhead.
This commit is contained in:
parent
1b05c23a3b
commit
84daff6399
@ -206,10 +206,14 @@ namespace dlib
|
||||
data.loss = 0;
|
||||
|
||||
data.num = problem.get_num_samples();
|
||||
|
||||
// how many samples to process in a single task (aim for 100 jobs per thread)
|
||||
const long block_size = std::max<long>(1, data.num / (1+tp.num_threads_in_pool()*100));
|
||||
|
||||
binder b(*this, req, data);
|
||||
for (long i = 0; i < data.num; ++i)
|
||||
for (long i = 0; i < data.num; i+=block_size)
|
||||
{
|
||||
tp.add_task(b, &binder::call_oracle, i);
|
||||
tp.add_task(b, &binder::call_oracle, i, std::min(i + block_size, data.num));
|
||||
}
|
||||
tp.wait_for_all_tasks();
|
||||
|
||||
@ -227,20 +231,50 @@ namespace dlib
|
||||
) : self(self_), req(req_), data(data_) {}
|
||||
|
||||
void call_oracle (
|
||||
long i
|
||||
long begin,
|
||||
long end
|
||||
)
|
||||
{
|
||||
scalar_type loss;
|
||||
feature_vector_type ftemp;
|
||||
self.cache[i].separation_oracle_cached(req.skip_cache,
|
||||
req.cur_risk_lower_bound,
|
||||
req.current_solution,
|
||||
loss,
|
||||
ftemp);
|
||||
// If we are only going to call the separation oracle once then
|
||||
// don't run the slightly more complex for loop version of this code.
|
||||
if (end-begin <= 1)
|
||||
{
|
||||
scalar_type loss;
|
||||
feature_vector_type ftemp;
|
||||
self.cache[begin].separation_oracle_cached(req.skip_cache,
|
||||
req.cur_risk_lower_bound,
|
||||
req.current_solution,
|
||||
loss,
|
||||
ftemp);
|
||||
|
||||
auto_mutex lock(self.accum_mutex);
|
||||
data.loss += loss;
|
||||
sparse_vector::add_to(data.subgradient, ftemp);
|
||||
auto_mutex lock(self.accum_mutex);
|
||||
data.loss += loss;
|
||||
sparse_vector::add_to(data.subgradient, ftemp);
|
||||
}
|
||||
else
|
||||
{
|
||||
scalar_type loss = 0;
|
||||
matrix_type faccum(data.subgradient.size(),1);
|
||||
faccum = 0;
|
||||
|
||||
feature_vector_type ftemp;
|
||||
|
||||
for (long i = begin; i < end; ++i)
|
||||
{
|
||||
scalar_type loss_temp;
|
||||
self.cache[i].separation_oracle_cached(req.skip_cache,
|
||||
req.cur_risk_lower_bound,
|
||||
req.current_solution,
|
||||
loss_temp,
|
||||
ftemp);
|
||||
loss += loss_temp;
|
||||
sparse_vector::add_to(faccum, ftemp);
|
||||
}
|
||||
|
||||
auto_mutex lock(self.accum_mutex);
|
||||
data.loss += loss;
|
||||
sparse_vector::add_to(data.subgradient, faccum);
|
||||
}
|
||||
}
|
||||
|
||||
const node_type& self;
|
||||
|
@ -50,16 +50,42 @@ namespace dlib
|
||||
) : self(self_), w(w_), subgradient(subgradient_), total_loss(total_loss_) {}
|
||||
|
||||
void call_oracle (
|
||||
long i
|
||||
long begin,
|
||||
long end
|
||||
)
|
||||
{
|
||||
scalar_type loss;
|
||||
feature_vector_type ftemp;
|
||||
self.separation_oracle_cached(i, w, loss, ftemp);
|
||||
// If we are only going to call the separation oracle once then
|
||||
// don't run the slightly more complex for loop version of this code.
|
||||
if (end-begin <= 1)
|
||||
{
|
||||
scalar_type loss;
|
||||
feature_vector_type ftemp;
|
||||
self.separation_oracle_cached(begin, w, loss, ftemp);
|
||||
|
||||
auto_mutex lock(self.accum_mutex);
|
||||
total_loss += loss;
|
||||
sparse_vector::add_to(subgradient, ftemp);
|
||||
auto_mutex lock(self.accum_mutex);
|
||||
total_loss += loss;
|
||||
sparse_vector::add_to(subgradient, ftemp);
|
||||
}
|
||||
else
|
||||
{
|
||||
scalar_type loss = 0;
|
||||
matrix_type faccum(subgradient.size(),1);
|
||||
faccum = 0;
|
||||
|
||||
feature_vector_type ftemp;
|
||||
|
||||
for (long i = begin; i < end; ++i)
|
||||
{
|
||||
scalar_type loss_temp;
|
||||
self.separation_oracle_cached(i, w, loss_temp, ftemp);
|
||||
loss += loss_temp;
|
||||
sparse_vector::add_to(faccum, ftemp);
|
||||
}
|
||||
|
||||
auto_mutex lock(self.accum_mutex);
|
||||
total_loss += loss;
|
||||
sparse_vector::add_to(subgradient, faccum);
|
||||
}
|
||||
}
|
||||
|
||||
const structural_svm_problem_threaded& self;
|
||||
@ -76,10 +102,14 @@ namespace dlib
|
||||
) const
|
||||
{
|
||||
const long num = this->get_num_samples();
|
||||
|
||||
// how many samples to process in a single task (aim for 100 jobs per thread)
|
||||
const long block_size = std::max<long>(1, num / (1+tp.num_threads_in_pool()*100));
|
||||
|
||||
binder b(*this, w, subgradient, total_loss);
|
||||
for (long i = 0; i < num; ++i)
|
||||
for (long i = 0; i < num; i+=block_size)
|
||||
{
|
||||
tp.add_task(b, &binder::call_oracle, i);
|
||||
tp.add_task(b, &binder::call_oracle, i, std::min(i+block_size, num));
|
||||
}
|
||||
tp.wait_for_all_tasks();
|
||||
}
|
||||
|
@ -486,15 +486,16 @@ namespace
|
||||
|
||||
void make_dataset (
|
||||
std::vector<sample_type>& samples,
|
||||
std::vector<scalar_type>& labels
|
||||
std::vector<scalar_type>& labels,
|
||||
int num
|
||||
)
|
||||
{
|
||||
samples.clear();
|
||||
labels.clear();
|
||||
dlib::rand rnd;
|
||||
static dlib::rand rnd;
|
||||
for (int i = 0; i < 10; ++i)
|
||||
{
|
||||
for (int j = 0; j < 100; ++j)
|
||||
for (int j = 0; j < num; ++j)
|
||||
{
|
||||
sample_type samp;
|
||||
samp = 0;
|
||||
@ -517,7 +518,10 @@ namespace
|
||||
"Runs tests on the structural svm components.")
|
||||
{}
|
||||
|
||||
void perform_test (
|
||||
void run_test (
|
||||
const std::vector<sample_type>& samples,
|
||||
const std::vector<scalar_type>& labels,
|
||||
const double true_obj
|
||||
)
|
||||
{
|
||||
typedef linear_kernel<sample_type> kernel_type;
|
||||
@ -530,10 +534,6 @@ namespace
|
||||
trainer1.set_epsilon(1e-4);
|
||||
trainer1.set_c(10);
|
||||
|
||||
std::vector<sample_type> samples;
|
||||
std::vector<scalar_type> labels;
|
||||
make_dataset(samples, labels);
|
||||
|
||||
|
||||
multiclass_linear_decision_function<kernel_type,double> df1, df2, df3, df4, df5;
|
||||
double obj1, obj2, obj3, obj4, obj5;
|
||||
@ -561,11 +561,11 @@ namespace
|
||||
DLIB_TEST(std::abs(obj1 - obj3) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj1 - obj4) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj1 - obj5) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj1 - 1.155) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj2 - 1.155) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj3 - 1.155) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj4 - 1.155) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj5 - 1.155) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj1 - true_obj) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj2 - true_obj) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj3 - true_obj) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj4 - true_obj) < 1e-2);
|
||||
DLIB_TEST(std::abs(obj5 - true_obj) < 1e-2);
|
||||
|
||||
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df2.weights));
|
||||
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df3.weights));
|
||||
@ -589,27 +589,46 @@ namespace
|
||||
matrix<double> res = test_multiclass_decision_function(df1, samples, labels);
|
||||
dlog << LINFO << res;
|
||||
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
|
||||
DLIB_TEST(sum(diag(res)) == 1000);
|
||||
DLIB_TEST(sum(diag(res)) == samples.size());
|
||||
|
||||
res = test_multiclass_decision_function(df2, samples, labels);
|
||||
dlog << LINFO << res;
|
||||
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
|
||||
DLIB_TEST(sum(diag(res)) == 1000);
|
||||
DLIB_TEST(sum(diag(res)) == samples.size());
|
||||
|
||||
res = test_multiclass_decision_function(df3, samples, labels);
|
||||
dlog << LINFO << res;
|
||||
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
|
||||
DLIB_TEST(sum(diag(res)) == 1000);
|
||||
DLIB_TEST(sum(diag(res)) == samples.size());
|
||||
|
||||
res = test_multiclass_decision_function(df4, samples, labels);
|
||||
dlog << LINFO << res;
|
||||
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
|
||||
DLIB_TEST(sum(diag(res)) == 1000);
|
||||
DLIB_TEST(sum(diag(res)) == samples.size());
|
||||
|
||||
res = test_multiclass_decision_function(df5, samples, labels);
|
||||
dlog << LINFO << res;
|
||||
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
|
||||
DLIB_TEST(sum(diag(res)) == 1000);
|
||||
DLIB_TEST(sum(diag(res)) == samples.size());
|
||||
}
|
||||
|
||||
void perform_test (
|
||||
)
|
||||
{
|
||||
std::vector<sample_type> samples;
|
||||
std::vector<scalar_type> labels;
|
||||
|
||||
dlog << LINFO << "test with 100 samples per class";
|
||||
make_dataset(samples, labels, 100);
|
||||
run_test(samples, labels, 1.155);
|
||||
|
||||
dlog << LINFO << "test with 1 sample per class";
|
||||
make_dataset(samples, labels, 1);
|
||||
run_test(samples, labels, 0.251);
|
||||
|
||||
dlog << LINFO << "test with 2 sample per class";
|
||||
make_dataset(samples, labels, 2);
|
||||
run_test(samples, labels, 0.444);
|
||||
}
|
||||
} a;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user