mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
depth_group replaced with concat layer
This commit is contained in:
parent
93e786db6c
commit
1f0318e222
495
dlib/dnn/core.h
495
dlib/dnn/core.h
@ -648,6 +648,7 @@ namespace dlib
|
||||
friend class add_skip_layer;
|
||||
template <size_t N, template<typename> class L, typename S>
|
||||
friend class repeat;
|
||||
friend class dnn_tester;
|
||||
|
||||
// Allow copying networks from one to another as long as their corresponding
|
||||
// layers can be constructed from each other.
|
||||
@ -1520,6 +1521,7 @@ namespace dlib
|
||||
friend class add_skip_layer;
|
||||
template <size_t N, template<typename> class L, typename S>
|
||||
friend class repeat;
|
||||
friend class dnn_tester;
|
||||
|
||||
// You wouldn't put a tag on a layer if you didn't want to access its forward
|
||||
// outputs. So this is always true.
|
||||
@ -3191,499 +3193,6 @@ namespace dlib
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <typename T>
|
||||
struct group_helper;
|
||||
template<typename... R>
|
||||
struct group_count_helper;
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// this class is used to reference group layer input
|
||||
class group_input
|
||||
{
|
||||
public:
|
||||
typedef tensor input_type;
|
||||
const static unsigned int sample_expansion_factor = 1;
|
||||
friend void serialize(const group_input& item, std::ostream& out)
|
||||
{
|
||||
serialize("group_input", out);
|
||||
}
|
||||
|
||||
friend void deserialize(group_input& item, std::istream& in)
|
||||
{
|
||||
std::string version;
|
||||
deserialize(version, in);
|
||||
if (version != "group_input")
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::group_input.");
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const group_input& item)
|
||||
{
|
||||
out << "group_input";
|
||||
return out;
|
||||
}
|
||||
};
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
template <typename GRP, typename SUBNET>
|
||||
class depth_group;
|
||||
|
||||
|
||||
template <typename T, typename U>
|
||||
struct is_nonloss_layer_type<depth_group<T,U>> : std::true_type {};
|
||||
|
||||
template <typename GRP, typename SUBNET>
|
||||
class depth_group
|
||||
{
|
||||
public:
|
||||
typedef GRP grp_type;
|
||||
typedef SUBNET subnet_type;
|
||||
typedef typename subnet_type::input_type input_type;
|
||||
const static size_t group_size = std::tuple_size<grp_type>::value;
|
||||
const static size_t num_layers_in_group = impl::group_count_helper<GRP>::num_layers;
|
||||
const static size_t num_layers = subnet_type::num_layers + num_layers_in_group;
|
||||
const static size_t num_computational_layers_in_group = impl::group_count_helper<GRP>::num_computational_layers;
|
||||
const static size_t num_computational_layers = subnet_type::num_computational_layers + num_computational_layers_in_group;
|
||||
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
|
||||
|
||||
using group_helper = impl::group_helper<grp_type>;
|
||||
|
||||
depth_group(
|
||||
):
|
||||
subnetwork(new subnet_type()),
|
||||
grp(new grp_type()),
|
||||
gradient_input_is_stale(true),
|
||||
get_output_and_gradient_input_disabled(false)
|
||||
{
|
||||
}
|
||||
|
||||
depth_group(const depth_group& item)
|
||||
{
|
||||
grp.reset(new grp_type(*item.grp));
|
||||
subnetwork.reset(new subnet_type(*item.subnetwork));
|
||||
gradient_input_is_stale = item.gradient_input_is_stale;
|
||||
get_output_and_gradient_input_disabled = item.get_output_and_gradient_input_disabled;
|
||||
x_grad = item.x_grad;
|
||||
cached_output = item.cached_output;
|
||||
temp_tensor = item.temp_tensor;
|
||||
}
|
||||
depth_group& operator=(const depth_group& item) { depth_group(item).swap(*this); return *this;}
|
||||
depth_group(depth_group&& item) : depth_group() { swap(item); }
|
||||
depth_group& operator=(depth_group&& item) { swap(item); return *this; }
|
||||
|
||||
template <typename T, typename U, typename E>
|
||||
friend class add_layer;
|
||||
template <typename T, bool is_first, typename E>
|
||||
friend class dimpl::subnet_wrapper;
|
||||
template <unsigned long T, typename U, typename E>
|
||||
friend class add_tag_layer;
|
||||
template <template<typename> class T, typename U>
|
||||
friend class add_skip_layer;
|
||||
template <size_t N, template<typename> class L, typename S>
|
||||
friend class repeat;
|
||||
|
||||
// Allow copying networks from one to another as long as their corresponding
|
||||
// layers can be constructed from each other.
|
||||
template <typename T, typename U>
|
||||
depth_group(
|
||||
const depth_group<T,U>& item
|
||||
) :
|
||||
grp(new grp_type(item.detail())),
|
||||
subnetwork(new subnet_type(item.subnet())),
|
||||
gradient_input_is_stale(item.gradient_input_is_stale),
|
||||
get_output_and_gradient_input_disabled(item.get_output_and_gradient_input_disabled),
|
||||
x_grad(item.x_grad),
|
||||
cached_output(item.cached_output)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename input_iterator>
|
||||
void to_tensor (
|
||||
input_iterator ibegin,
|
||||
input_iterator iend,
|
||||
resizable_tensor& data
|
||||
) const
|
||||
{
|
||||
subnetwork->to_tensor(ibegin,iend,data);
|
||||
}
|
||||
|
||||
template <typename input_iterator>
|
||||
const tensor& operator() (
|
||||
input_iterator ibegin,
|
||||
input_iterator iend
|
||||
)
|
||||
{
|
||||
to_tensor(ibegin,iend,temp_tensor);
|
||||
return forward(temp_tensor);
|
||||
}
|
||||
|
||||
|
||||
const tensor& operator() (const input_type& x)
|
||||
{
|
||||
return (*this)(&x, &x+1);
|
||||
}
|
||||
|
||||
|
||||
// forward for group: subnet->for_each_in_group->concat->cached_output
|
||||
const tensor& forward(const tensor& x)
|
||||
{
|
||||
|
||||
subnetwork->forward(x);
|
||||
long group_depth = 0;
|
||||
|
||||
group_helper::forward(subnetwork->get_output(), detail(), group_depth);
|
||||
|
||||
auto& out_0 = std::get<0>(detail()).get_output();
|
||||
cached_output.set_size(out_0.num_samples(), group_depth, out_0.nr(), out_0.nc());
|
||||
|
||||
group_helper::concat(cached_output, detail());
|
||||
|
||||
|
||||
gradient_input_is_stale = true;
|
||||
return private_get_output();
|
||||
}
|
||||
|
||||
private:
|
||||
bool this_layer_requires_forward_output(
|
||||
)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
tensor& private_get_output() const
|
||||
{
|
||||
return const_cast<resizable_tensor&>(cached_output);
|
||||
}
|
||||
tensor& private_get_gradient_input()
|
||||
{
|
||||
if (gradient_input_is_stale)
|
||||
{
|
||||
gradient_input_is_stale = false;
|
||||
x_grad.copy_size(private_get_output());
|
||||
x_grad = 0;
|
||||
}
|
||||
return x_grad;
|
||||
}
|
||||
void disable_output_and_gradient_getters (
|
||||
) { get_output_and_gradient_input_disabled = true; }
|
||||
public:
|
||||
const tensor& get_output() const
|
||||
{
|
||||
if (get_output_and_gradient_input_disabled)
|
||||
throw dlib::error("Accessing this layer's get_output() is disabled because an in-place layer has been stacked on top of it.");
|
||||
return private_get_output();
|
||||
}
|
||||
tensor& get_gradient_input()
|
||||
{
|
||||
if (get_output_and_gradient_input_disabled)
|
||||
throw dlib::error("Accessing this layer's get_gradient_input() is disabled because an in-place layer has been stacked on top of it.");
|
||||
return private_get_gradient_input();
|
||||
}
|
||||
|
||||
const tensor& get_final_data_gradient(
|
||||
) const { return subnetwork->get_final_data_gradient(); }
|
||||
|
||||
void back_propagate_error(const tensor& x)
|
||||
{
|
||||
back_propagate_error(x, private_get_gradient_input());
|
||||
}
|
||||
void back_propagate_error(const tensor& x, const tensor& gradient_input)
|
||||
{
|
||||
group_helper::backward(detail(), get_gradient_input(), subnetwork->get_output(), subnetwork->get_gradient_input());
|
||||
|
||||
subnetwork->back_propagate_error(x);
|
||||
|
||||
// zero out get_gradient_input()
|
||||
gradient_input_is_stale = true;
|
||||
}
|
||||
|
||||
template <typename solver_type>
|
||||
void update_parameters(sstack<solver_type> solvers, double step_size)
|
||||
{
|
||||
DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
|
||||
group_helper::update_parameters(solvers, step_size, detail());
|
||||
solvers = solvers.pop(num_computational_layers_in_group);
|
||||
subnetwork->update_parameters(solvers, step_size);
|
||||
}
|
||||
|
||||
const subnet_type& subnet() const { return *subnetwork; }
|
||||
subnet_type& subnet() { return *subnetwork; }
|
||||
|
||||
const grp_type& detail() const { return *grp; }
|
||||
grp_type& detail() { return *grp; }
|
||||
|
||||
void clean()
|
||||
{
|
||||
x_grad.clear();
|
||||
cached_output.clear();
|
||||
temp_tensor.clear();
|
||||
gradient_input_is_stale = true;
|
||||
subnetwork->clean();
|
||||
}
|
||||
|
||||
friend void serialize(const depth_group& item, std::ostream& out)
|
||||
{
|
||||
int version = 2;
|
||||
serialize(version, out);
|
||||
serialize(*item.subnetwork, out);
|
||||
group_helper::serialize(*item.grp, out);
|
||||
serialize(item.gradient_input_is_stale, out);
|
||||
serialize(item.get_output_and_gradient_input_disabled, out);
|
||||
serialize(item.x_grad, out);
|
||||
serialize(item.cached_output, out);
|
||||
}
|
||||
|
||||
friend void deserialize(depth_group& item, std::istream& in)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (!(1 <= version && version <= 2))
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::depth_group.");
|
||||
deserialize(*item.subnetwork, in);
|
||||
group_helper::deserialize(*item.grp, in);
|
||||
deserialize(item.gradient_input_is_stale, in);
|
||||
deserialize(item.get_output_and_gradient_input_disabled, in);
|
||||
deserialize(item.x_grad, in);
|
||||
deserialize(item.cached_output, in);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& out, const depth_group& item)
|
||||
{
|
||||
item.print(out, 0);
|
||||
return out;
|
||||
}
|
||||
|
||||
void print (std::ostream& out, unsigned long idx=0) const
|
||||
{
|
||||
out << "layer<" << idx << ">\t";
|
||||
detail().print(out, idx);
|
||||
subnet().print(out, idx+1);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
|
||||
void swap(depth_group& item)
|
||||
{
|
||||
std::swap(subnetwork,item.subnetwork);
|
||||
std::swap(grp, item.grp);
|
||||
std::swap(gradient_input_is_stale, item.gradient_input_is_stale);
|
||||
std::swap(get_output_and_gradient_input_disabled, item.get_output_and_gradient_input_disabled);
|
||||
std::swap(x_grad, item.x_grad);
|
||||
std::swap(cached_output, item.cached_output);
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<subnet_type> subnetwork;
|
||||
std::unique_ptr<grp_type> grp;
|
||||
|
||||
bool gradient_input_is_stale;
|
||||
bool get_output_and_gradient_input_disabled;
|
||||
|
||||
resizable_tensor x_grad;
|
||||
resizable_tensor cached_output;
|
||||
|
||||
// temp_tensor doesn't logically contribute to the state of this object.
|
||||
// It is here only to prevent it from being reallocated over and over.
|
||||
resizable_tensor temp_tensor;
|
||||
};
|
||||
|
||||
// define "grp" layer shorter name for usage when creating networks
|
||||
template <typename GRP, typename SUBNET>
|
||||
using grp = depth_group<GRP, SUBNET>;
|
||||
|
||||
namespace impl {
|
||||
template<
|
||||
unsigned int i,
|
||||
typename T, typename U
|
||||
>
|
||||
struct layer_helper<i, depth_group<T, U>,
|
||||
typename std::enable_if<(i != 0 && i >= depth_group<T, U>::num_layers_in_group)>::type> {
|
||||
const static size_t num_layers_in_group = depth_group<T, U>::num_layers_in_group;
|
||||
|
||||
using next_type = typename depth_group<T, U>::subnet_type;
|
||||
using type = typename layer_helper<i - num_layers_in_group, next_type>::type;
|
||||
|
||||
static type &layer(depth_group<T, U> &n) {
|
||||
return layer_helper<i - num_layers_in_group, next_type>::layer(n.subnet());
|
||||
}
|
||||
};
|
||||
|
||||
template<
|
||||
unsigned int i,
|
||||
typename T, typename U
|
||||
>
|
||||
struct layer_helper<i, depth_group<T, U>,
|
||||
typename std::enable_if<(i != 0 && i < depth_group<T, U>::num_layers_in_group)>::type> {
|
||||
const static size_t num_layers_in_group = depth_group<T, U>::num_layers_in_group;
|
||||
typedef typename depth_group<T, U>::grp_type grp_type;
|
||||
using type = typename layer_helper<i, grp_type>::type;
|
||||
|
||||
static type &layer(depth_group<T, U> &n) {
|
||||
return layer_helper<i, grp_type>::layer(n.detail());
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int pos, unsigned int i, typename... T>
|
||||
struct group_pos_search{
|
||||
const static unsigned int count = sizeof...(T);
|
||||
const static unsigned int pos_from_begin = count - pos - 1;
|
||||
using tuple_elem_type = typename std::tuple_element<pos_from_begin, std::tuple<T...>>::type;
|
||||
static const unsigned int num_layers = tuple_elem_type::num_layers;
|
||||
|
||||
static const unsigned int layer_index = i >= num_layers ? group_pos_search<pos - 1, i - num_layers, T...>::layer_index : i;
|
||||
static const unsigned int tuple_index = i >= num_layers ? group_pos_search<pos - 1, i - num_layers, T...>::tuple_index + 1 : pos;
|
||||
};
|
||||
template <unsigned int i, typename... T>
|
||||
struct group_pos_search<0, i, T...>{
|
||||
static const unsigned int layer_index = i;
|
||||
static const unsigned int tuple_index = 0;
|
||||
};
|
||||
|
||||
|
||||
template<
|
||||
unsigned int i,
|
||||
typename... R
|
||||
>
|
||||
struct layer_helper<i, std::tuple<R...>, typename std::enable_if<true>::type>{
|
||||
const static unsigned tuple_size = sizeof...(R);
|
||||
|
||||
static const unsigned int layer_index = group_pos_search<tuple_size - 1, i, R...>::layer_index;
|
||||
static const unsigned int tuple_index = group_pos_search<tuple_size - 1, i, R...>::tuple_index;
|
||||
|
||||
using next_type = typename std::tuple_element<tuple_index, std::tuple<R...>>::type;//typename std::remove_reference<decltype(makeT().subnet())>::type;
|
||||
using type = typename layer_helper<layer_index,next_type>::type;
|
||||
|
||||
static type &layer(std::tuple<R...> &n) {
|
||||
return layer_helper<layer_index, next_type>::layer(std::get<tuple_index>(n));
|
||||
}
|
||||
};
|
||||
|
||||
// helper classes for layer group processing
|
||||
template <size_t idx, typename... T>
|
||||
struct group_helper_impl{
|
||||
static void serialize_impl(const std::tuple<T...>& data, std::ostream& out){
|
||||
group_helper_impl<idx - 1, T...>::serialize_impl(data, out);
|
||||
serialize(std::get<idx>(data), out);
|
||||
}
|
||||
static void deserialize_impl(std::tuple<T...>& data, std::istream& in){
|
||||
group_helper_impl<idx - 1, T...>::deserialize_impl(data, in);
|
||||
deserialize(std::get<idx>(data), in);
|
||||
}
|
||||
static void forward(const tensor& x, std::tuple<T...>& grp, long& group_depth){
|
||||
group_helper_impl<idx - 1, T...>::forward(x, grp, group_depth);
|
||||
auto& r = std::get<idx>(grp).forward(x);
|
||||
group_depth += r.k();
|
||||
}
|
||||
static size_t concat(resizable_tensor& cached_output, std::tuple<T...>& grp, size_t offset){
|
||||
offset += group_helper_impl<idx - 1, T...>::concat(cached_output, grp, offset);
|
||||
auto& output = std::get<idx>(grp).get_output();
|
||||
tt::concat_depth(cached_output, offset, output);
|
||||
return offset + output.nc() * output.nr() * output.k();
|
||||
}
|
||||
template<typename solver_type>
|
||||
static sstack<solver_type> update_parameters(sstack<solver_type> solvers, double step_size, std::tuple<T...>& grp){
|
||||
sstack<solver_type> sub_solvers = group_helper_impl<idx - 1, T...>::update_parameters(solvers, step_size, grp);
|
||||
std::get<idx>(grp).update_parameters(sub_solvers, step_size);
|
||||
using tuple_elem_type = typename std::tuple_element<idx, std::tuple<T...>>::type;
|
||||
return sub_solvers.pop(tuple_elem_type::num_computational_layers);
|
||||
}
|
||||
static size_t backward(std::tuple<T...>& grp, const tensor& group_gradient_in,
|
||||
const tensor& subnet_out, tensor& group_gradient_out, size_t offset)
|
||||
{
|
||||
offset += group_helper_impl<idx - 1, T...>::backward(grp, group_gradient_in, subnet_out, group_gradient_out, offset);
|
||||
|
||||
auto& subnet = std::get<idx>(grp);
|
||||
auto& gr_input = subnet.get_gradient_input();
|
||||
tt::split_depth(gr_input, offset, group_gradient_in);
|
||||
|
||||
subnet.back_propagate_error(subnet_out);
|
||||
|
||||
tt::add(group_gradient_out, group_gradient_out, subnet.get_final_data_gradient());
|
||||
return offset + gr_input.nc() * gr_input.nr() * gr_input.k();
|
||||
}
|
||||
};
|
||||
template <typename... T>
|
||||
struct group_helper_impl<0, T...>{
|
||||
static void serialize_impl(const std::tuple<T...>& data, std::ostream& out){
|
||||
serialize(std::get<0>(data), out);
|
||||
}
|
||||
static void deserialize_impl(std::tuple<T...>& data, std::istream& in){
|
||||
deserialize(std::get<0>(data), in);
|
||||
}
|
||||
static void forward(const tensor& x, std::tuple<T...>& grp, long& group_depth){
|
||||
auto& r = std::get<0>(grp).forward(x);
|
||||
group_depth += r.k();
|
||||
}
|
||||
static size_t concat(resizable_tensor& cached_output, std::tuple<T...>& grp, size_t offset){
|
||||
auto& output = std::get<0>(grp).get_output();
|
||||
tt::concat_depth(cached_output, offset, output);
|
||||
return offset + output.nc() * output.nr() * output.k();
|
||||
}
|
||||
template<typename solver_type>
|
||||
static sstack<solver_type> update_parameters(sstack<solver_type> solvers, double step_size, std::tuple<T...>& grp){
|
||||
std::get<0>(grp).update_parameters(solvers, step_size);
|
||||
using tuple_elem_type = typename std::tuple_element<0, std::tuple<T...>>::type;
|
||||
return solvers.pop(tuple_elem_type::num_computational_layers);
|
||||
}
|
||||
static size_t backward(std::tuple<T...>& grp, const tensor& group_gradient_in,
|
||||
const tensor& subnet_out, tensor& group_gradient_out, size_t offset)
|
||||
{
|
||||
auto& item = std::get<0>(grp);
|
||||
auto& gr_input = item.get_gradient_input();
|
||||
tt::split_depth(gr_input, offset, group_gradient_in);
|
||||
item.back_propagate_error(subnet_out);
|
||||
|
||||
tt::add(group_gradient_out, group_gradient_out, item.get_final_data_gradient());
|
||||
return offset + gr_input.nc() * gr_input.nr() * gr_input.k();
|
||||
}
|
||||
};
|
||||
template <typename... T>
|
||||
struct group_helper<std::tuple<T...>>{
|
||||
static void serialize(const std::tuple<T...> & data, std::ostream& out){
|
||||
group_helper_impl<std::tuple_size<std::tuple<T...>>::value - 1, T...>::serialize_impl(data, out);
|
||||
}
|
||||
static void deserialize(std::tuple<T...>& data, std::istream& in){
|
||||
group_helper_impl<std::tuple_size<std::tuple<T...>>::value - 1, T...>::deserialize_impl(data, in);
|
||||
}
|
||||
static void forward(const tensor& x, std::tuple<T...>& grp, long& group_depth){
|
||||
group_helper_impl<std::tuple_size<std::tuple<T...>>::value - 1, T...>::forward(x, grp, group_depth);
|
||||
}
|
||||
static void concat(resizable_tensor& out, std::tuple<T...>& grp){
|
||||
group_helper_impl<std::tuple_size<std::tuple<T...>>::value - 1, T...>::concat(out, grp, 0);
|
||||
}
|
||||
template<typename solver_type>
|
||||
static void update_parameters(sstack<solver_type> solvers, double step_size, std::tuple<T...>& grp){
|
||||
group_helper_impl<std::tuple_size<std::tuple<T...>>::value - 1, T...>::update_parameters(solvers, step_size, grp);
|
||||
}
|
||||
static void backward(std::tuple<T...>& grp, const tensor& group_gradient_in, const tensor& subnet_out, tensor& group_gradient_out)
|
||||
{
|
||||
group_helper_impl<std::tuple_size<std::tuple<T...>>::value - 1, T...>::backward(grp, group_gradient_in, subnet_out, group_gradient_out, 0);
|
||||
}
|
||||
};
|
||||
|
||||
// helper classes to understand the count of group items layers
|
||||
template<typename T>
|
||||
struct group_count_helper<T>{
|
||||
const static size_t num_layers = T::num_layers;
|
||||
const static size_t num_computational_layers = T::num_computational_layers;
|
||||
};
|
||||
|
||||
template<typename T, typename... R>
|
||||
struct group_count_helper<T, R...>{
|
||||
const static size_t num_layers = group_count_helper<T>::num_layers + group_count_helper<R...>::num_layers;
|
||||
const static size_t num_computational_layers = group_count_helper<T>::num_computational_layers + group_count_helper<R...>::num_computational_layers;
|
||||
};
|
||||
template<typename... R>
|
||||
struct group_count_helper<std::tuple<R...>>{
|
||||
const static size_t num_layers = group_count_helper<R...>::num_layers;
|
||||
const static size_t num_computational_layers = group_count_helper<R...>::num_computational_layers;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // DLIB_DNn_CORE_H_
|
||||
|
@ -1783,58 +1783,37 @@ namespace dlib
|
||||
filters_gradient += gi*temp;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
void concat_depth(
|
||||
// ------------------------------------------------------------------------------------
|
||||
void copy_tensor(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
)
|
||||
size_t dest_k_offset,
|
||||
const tensor& src,
|
||||
size_t src_k_offset,
|
||||
size_t count_k
|
||||
)
|
||||
{
|
||||
const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
|
||||
const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
|
||||
|
||||
const size_t block_size = count_k * dest.nc() * dest.nr();
|
||||
|
||||
DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
|
||||
dest.nc() == src.nc() && dest.nr() == src.nr(), "All sources should fit into dest tensor size");
|
||||
DLIB_CASSERT(dest.k() - dest_k_offset >= count_k, "Not enough space in dest tensor");
|
||||
DLIB_CASSERT(src.k() - src_k_offset >= count_k, "Not enough space in src tensor");
|
||||
|
||||
float* dest_p = dest.host() + dest_k_offset * dest.nc() * dest.nr();
|
||||
const float* src_p = src.host() + src_k_offset * src.nc() * src.nr();
|
||||
|
||||
for (unsigned long i = 0; i < src.num_samples(); ++i)
|
||||
{
|
||||
const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
|
||||
const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
|
||||
::memcpy(dest_p, src_p, block_size * sizeof(float));
|
||||
|
||||
DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
|
||||
dest.nc() == src.nc() && dest.nr() == src.nr(), "All sources should fit into dest tensor size");
|
||||
DLIB_CASSERT(dest_sample_size >= src_sample_size + sample_offset, "Not enough space in dest tensor");
|
||||
|
||||
float* dest_p = dest.host_write_only() + sample_offset;
|
||||
const float* src_p = src.host();
|
||||
|
||||
for (unsigned long i = 0; i < src.num_samples(); ++i)
|
||||
{
|
||||
::memcpy(dest_p, src_p, src_sample_size * sizeof(float));
|
||||
|
||||
dest_p += dest_sample_size;
|
||||
src_p += src_sample_size;
|
||||
}
|
||||
dest_p += dest_sample_size;
|
||||
src_p += src_sample_size;
|
||||
}
|
||||
}
|
||||
|
||||
void split_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
)
|
||||
{
|
||||
const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
|
||||
const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
|
||||
|
||||
DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
|
||||
dest.nc() == src.nc() && dest.nr() == src.nr(),
|
||||
"All sources should fit into dest tensor size");
|
||||
DLIB_CASSERT(dest_sample_size <= src_sample_size - sample_offset, "Not enough space in dest tensor");
|
||||
|
||||
float *dest_p = dest.host_write_only();
|
||||
const float *src_p = src.host() + sample_offset;
|
||||
|
||||
for (unsigned long i = 0; i < src.num_samples(); ++i) {
|
||||
::memcpy(dest_p, src_p, dest_sample_size * sizeof(float));
|
||||
|
||||
dest_p += dest_sample_size;
|
||||
src_p += src_sample_size;
|
||||
}
|
||||
}
|
||||
// ------------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
@ -384,19 +384,14 @@ namespace dlib
|
||||
long last_padding_x;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void concat_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
);
|
||||
|
||||
void split_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
);
|
||||
// -----------------------------------------------------------------------------------
|
||||
void copy_tensor(
|
||||
tensor& dest,
|
||||
size_t dest_k_offset,
|
||||
const tensor& src,
|
||||
size_t src_k_offset,
|
||||
size_t count_k
|
||||
);
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -796,57 +796,38 @@ namespace dlib
|
||||
grad.device(), src.device(), gradient_input.device(), grad.size(),
|
||||
param.device(), params_grad.device());
|
||||
}
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void concat_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
void copy_tensor(
|
||||
tensor& dest,
|
||||
size_t dest_k_offset,
|
||||
const tensor& src,
|
||||
size_t src_k_offset,
|
||||
size_t count_k
|
||||
)
|
||||
{
|
||||
const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
|
||||
const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
|
||||
|
||||
const size_t block_size = count_k * dest.nc() * dest.nr();
|
||||
|
||||
DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
|
||||
dest.nc() == src.nc() && dest.nr() == src.nr(), "All sources should fit into dest tensor size");
|
||||
DLIB_CASSERT(dest_sample_size >= src_sample_size + sample_offset, "Not enough space in dest tensor");
|
||||
DLIB_CASSERT(dest.k() - dest_k_offset >= count_k, "Not enough space in dest tensor");
|
||||
DLIB_CASSERT(src.k() - src_k_offset >= count_k, "Not enough space in src tensor");
|
||||
|
||||
float* dest_p = dest.device() + dest_k_offset * dest.nc() * dest.nr();
|
||||
const float* src_p = src.device() + src_k_offset * src.nc() * src.nr();;
|
||||
|
||||
float* dest_p = dest.device_write_only() + sample_offset;
|
||||
const float* src_p = src.device();
|
||||
|
||||
for (unsigned long i = 0; i < src.num_samples(); ++i)
|
||||
{
|
||||
CHECK_CUDA(cudaMemcpy(dest_p, src_p, src_sample_size * sizeof(float), cudaMemcpyDeviceToDevice));
|
||||
CHECK_CUDA(cudaMemcpy(dest_p, src_p, block_size * sizeof(float), cudaMemcpyDeviceToDevice));
|
||||
|
||||
dest_p += dest_sample_size;
|
||||
src_p += src_sample_size;
|
||||
}
|
||||
}
|
||||
|
||||
void split_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
)
|
||||
{
|
||||
const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
|
||||
const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
|
||||
|
||||
DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
|
||||
dest.nc() == src.nc() && dest.nr() == src.nr(),
|
||||
"All sources should fit into dest tensor size");
|
||||
DLIB_CASSERT(dest_sample_size <= src_sample_size - sample_offset, "Not enough space in dest tensor");
|
||||
|
||||
float *dest_p = dest.device_write_only();
|
||||
const float *src_p = src.device() + sample_offset;
|
||||
|
||||
for (unsigned long i = 0; i < src.num_samples(); ++i) {
|
||||
CHECK_CUDA(cudaMemcpy(dest_p, src_p, dest_sample_size * sizeof(float), cudaMemcpyDeviceToDevice));
|
||||
dest_p += dest_sample_size;
|
||||
src_p += src_sample_size;
|
||||
}
|
||||
}
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -258,16 +258,12 @@ namespace dlib
|
||||
tensor& params_grad
|
||||
);
|
||||
|
||||
void concat_depth(
|
||||
void copy_tensor(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
);
|
||||
|
||||
void split_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
const tensor& src,
|
||||
size_t dest_k_offset,
|
||||
size_t src_k_offset,
|
||||
size_t count_k
|
||||
);
|
||||
// ------------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
@ -1836,6 +1836,163 @@ namespace dlib
|
||||
template <typename SUBNET>
|
||||
using softmax = add_layer<softmax_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
namespace impl{
|
||||
// helper classes for layer concat processing
|
||||
template <template<typename> class... TAG_TYPES>
|
||||
struct concat_helper_impl {
|
||||
};
|
||||
template <template<typename> class TAG_TYPE>
|
||||
struct concat_helper_impl<TAG_TYPE>{
|
||||
template<typename SUBNET>
|
||||
static void resize_out(resizable_tensor& out, const SUBNET& sub, long sum_k)
|
||||
{
|
||||
auto& t = layer<TAG_TYPE>(sub).get_output();
|
||||
out.set_size(t.num_samples(), t.k() + sum_k, t.nr(), t.nc());
|
||||
}
|
||||
template<typename SUBNET>
|
||||
static void concat(tensor& out, const SUBNET& sub, size_t k_offset)
|
||||
{
|
||||
auto& t = layer<TAG_TYPE>(sub).get_output();
|
||||
tt::copy_tensor(out, k_offset, t, 0, t.k());
|
||||
}
|
||||
template<typename SUBNET>
|
||||
static void split(const tensor& input, SUBNET& sub, size_t k_offset)
|
||||
{
|
||||
auto& t = layer<TAG_TYPE>(sub).get_gradient_input();
|
||||
tt::copy_tensor(t, 0, input, k_offset, t.k());
|
||||
}
|
||||
};
|
||||
template <template<typename> class TAG_TYPE, template<typename> class... TAG_TYPES>
|
||||
struct concat_helper_impl<TAG_TYPE, TAG_TYPES...>{
|
||||
template<typename SUBNET>
|
||||
static void resize_out(resizable_tensor& out, const SUBNET& sub, long sum_k)
|
||||
{
|
||||
auto& t = layer<TAG_TYPE>(sub).get_output();
|
||||
concat_helper_impl<TAG_TYPES...>::resize_out(out, sub, sum_k + t.k());
|
||||
}
|
||||
template<typename SUBNET>
|
||||
static void concat(tensor& out, const SUBNET& sub, size_t k_offset)
|
||||
{
|
||||
auto& t = layer<TAG_TYPE>(sub).get_output();
|
||||
tt::copy_tensor(out, k_offset, t, 0, t.k());
|
||||
k_offset += t.k();
|
||||
concat_helper_impl<TAG_TYPES...>::concat(out, sub, k_offset);
|
||||
}
|
||||
template<typename SUBNET>
|
||||
static void split(const tensor& input, SUBNET& sub, size_t k_offset)
|
||||
{
|
||||
auto& t = layer<TAG_TYPE>(sub).get_gradient_input();
|
||||
tt::copy_tensor(t, 0, input, k_offset, t.k());
|
||||
k_offset += t.k();
|
||||
concat_helper_impl<TAG_TYPES...>::split(input, sub, k_offset);
|
||||
}
|
||||
};
|
||||
}
|
||||
// concat layer
|
||||
template<
|
||||
template<typename> class... TAG_TYPES
|
||||
>
|
||||
class concat_
|
||||
{
|
||||
public:
|
||||
template <typename SUBNET>
|
||||
void setup (const SUBNET&)
|
||||
{
|
||||
// do nothing
|
||||
}
|
||||
template <typename SUBNET>
|
||||
void forward(const SUBNET& sub, resizable_tensor& output)
|
||||
{
|
||||
// the total depth of result is the sum of depths from all tags
|
||||
impl::concat_helper_impl<TAG_TYPES...>::resize_out(output, sub, 0);
|
||||
|
||||
// copy output from each tag into different part result
|
||||
impl::concat_helper_impl<TAG_TYPES...>::concat(output, sub, 0);
|
||||
}
|
||||
|
||||
template <typename SUBNET>
|
||||
void backward(const tensor& gradient_input, SUBNET& sub, tensor&)
|
||||
{
|
||||
// Gradient is splitted into parts for each tag layer
|
||||
impl::concat_helper_impl<TAG_TYPES...>::split(gradient_input, sub, 0);
|
||||
}
|
||||
|
||||
const tensor& get_layer_params() const { return params; }
|
||||
tensor& get_layer_params() { return params; }
|
||||
|
||||
friend void serialize(const concat_& item, std::ostream& out)
|
||||
{
|
||||
serialize("concat_", out);
|
||||
serialize(sizeof...(TAG_TYPES), out);
|
||||
}
|
||||
|
||||
friend void deserialize(concat_& item, std::istream& in)
|
||||
{
|
||||
std::string version;
|
||||
deserialize(version, in);
|
||||
if (version != "concat_")
|
||||
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::concat_.");
|
||||
size_t count_tags;
|
||||
deserialize(count_tags, in);
|
||||
if (count_tags != sizeof...(TAG_TYPES))
|
||||
throw serialization_error("Invalid count of tags "+ std::to_string(count_tags) +", expecting " +
|
||||
std::to_string(sizeof...(TAG_TYPES)) + " found while deserializing dlib::concat_.");
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const concat_& item)
|
||||
{
|
||||
out << "concat\t ("
|
||||
<< sizeof...(TAG_TYPES)
|
||||
<< ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
private:
|
||||
resizable_tensor params; // unused
|
||||
};
|
||||
|
||||
|
||||
template <typename SUBNET, template<typename> class... TAG_TYPES>
|
||||
using concat = add_layer<concat_<TAG_TYPES...>, SUBNET>;
|
||||
|
||||
// inception layer will use tags internally. If user will use tags too,
|
||||
// some conflicts possible
|
||||
// to exclude them, here are new tags specially for inceptions
|
||||
template <typename SUBNET> using itag0 = add_tag_layer< 1000 + 0, SUBNET>;
|
||||
template <typename SUBNET> using itag1 = add_tag_layer< 1000 + 1, SUBNET>;
|
||||
template <typename SUBNET> using itag2 = add_tag_layer< 1000 + 2, SUBNET>;
|
||||
template <typename SUBNET> using itag3 = add_tag_layer< 1000 + 3, SUBNET>;
|
||||
template <typename SUBNET> using itag4 = add_tag_layer< 1000 + 4, SUBNET>;
|
||||
template <typename SUBNET> using itag5 = add_tag_layer< 1000 + 5, SUBNET>;
|
||||
// skip to inception input
|
||||
template <typename SUBNET> using iskip = add_skip_layer< itag0, SUBNET>;
|
||||
|
||||
// here are some templates to be used for creating inception layer groups
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
typename SUBNET>
|
||||
using inception2 = concat<itag1<B1<iskip< itag2<B2< itag0<SUBNET>>>>>>, itag1, itag2>;
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
template<typename>class B3,
|
||||
typename SUBNET>
|
||||
using inception3 = concat<itag1<B1<iskip< itag2<B2<iskip< itag3<B3< itag0<SUBNET>>>>>>>>>, itag1, itag2, itag3>;
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
template<typename>class B3,
|
||||
template<typename>class B4,
|
||||
typename SUBNET>
|
||||
using inception4 = concat<itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4< itag0<SUBNET>>>>>>>>>>>>,
|
||||
itag1, itag2, itag3, itag4>;
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
template<typename>class B3,
|
||||
template<typename>class B4,
|
||||
template<typename>class B5,
|
||||
typename SUBNET>
|
||||
using inception5 = concat<itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4<iskip< itag5<B5< itag0<SUBNET>>>>>>>>>>>>>>>,
|
||||
itag1, itag2, itag3, itag4, itag5>;
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -1652,6 +1652,88 @@ namespace dlib
|
||||
using add_prev9_ = add_prev_<tag9>;
|
||||
using add_prev10_ = add_prev_<tag10>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template<
|
||||
template<typename> class... TAG_TYPES
|
||||
>
|
||||
class concat_
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
|
||||
defined above. This layer simply concatenates the output of requiered layers
|
||||
In particular, it copies each layer's output from TAG_TYPES into the corresponding
|
||||
place of the result tensor, those producing combined output
|
||||
The output of each tag layer is stored in a separate part of final output.
|
||||
FORWARD:
|
||||
for each (tag in TAG_TYPES)
|
||||
outout[i, k + tag.k(), r, c] = layer<tag>(subnet).get_output[i, k, r, c]
|
||||
|
||||
BACKWARD:
|
||||
for each (tag in TAG_TYPES)
|
||||
layer<tag>(subnet).get_gradient_input[i, k, r, c] = input[i, k + tag.k(), r, c]
|
||||
|
||||
This layer can be only used with tags inside.
|
||||
Each tagged layer should have identical num_samples, R and C size
|
||||
The output will have K = sum(k) of tags, and the, and the output's num_samples,
|
||||
R and C will be the same as tagged layers
|
||||
|
||||
!*/
|
||||
|
||||
public:
|
||||
template <typename SUBNET> void setup (const SUBNET& sub);
|
||||
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
|
||||
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
|
||||
const tensor& get_layer_params() const;
|
||||
tensor& get_layer_params();
|
||||
/*!
|
||||
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
|
||||
!*/
|
||||
};
|
||||
|
||||
|
||||
template <typename SUBNET, template<typename> class... TAG_TYPES>
|
||||
using concat = add_layer<concat_<TAG_TYPES...>, SUBNET>;
|
||||
|
||||
// inception layer will use tags internally. If user will use tags too,
|
||||
// some conflicts possible
|
||||
// to exclude them, here are new tags specially for inceptions
|
||||
template <typename SUBNET> using itag0 = add_tag_layer< 1000 + 0, SUBNET>;
|
||||
template <typename SUBNET> using itag1 = add_tag_layer< 1000 + 1, SUBNET>;
|
||||
template <typename SUBNET> using itag2 = add_tag_layer< 1000 + 2, SUBNET>;
|
||||
template <typename SUBNET> using itag3 = add_tag_layer< 1000 + 3, SUBNET>;
|
||||
template <typename SUBNET> using itag4 = add_tag_layer< 1000 + 4, SUBNET>;
|
||||
template <typename SUBNET> using itag5 = add_tag_layer< 1000 + 5, SUBNET>;
|
||||
// skip to inception input
|
||||
template <typename SUBNET> using iskip = add_skip_layer< itag0, SUBNET>;
|
||||
|
||||
// here are some templates to be used for creating inception layer groups
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
typename SUBNET>
|
||||
using inception2 = concat<itag1<B1<iskip< itag2<B2< itag0<SUBNET>>>>>>, itag1, itag2>;
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
template<typename>class B3,
|
||||
typename SUBNET>
|
||||
using inception3 = concat<itag1<B1<iskip< itag2<B2<iskip< itag3<B3< itag0<SUBNET>>>>>>>>>, itag1, itag2, itag3>;
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
template<typename>class B3,
|
||||
template<typename>class B4,
|
||||
typename SUBNET>
|
||||
using inception4 = concat<itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4< itag0<SUBNET>>>>>>>>>>>>,
|
||||
itag1, itag2, itag3, itag4>;
|
||||
template <template<typename>class B1,
|
||||
template<typename>class B2,
|
||||
template<typename>class B3,
|
||||
template<typename>class B4,
|
||||
template<typename>class B5,
|
||||
typename SUBNET>
|
||||
using inception5 = concat<itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4<iskip< itag5<B5< itag0<SUBNET>>>>>>>>>>>>>>>,
|
||||
itag1, itag2, itag3, itag4, itag5>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -678,26 +678,23 @@ namespace dlib { namespace tt
|
||||
#endif
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
void concat_depth(tensor& dest, size_t sample_offset, const tensor& src)
|
||||
{
|
||||
void copy_tensor(
|
||||
tensor& dest,
|
||||
size_t dest_k_offset,
|
||||
const tensor& src,
|
||||
size_t src_k_offset,
|
||||
size_t count_k
|
||||
)
|
||||
{
|
||||
#ifdef DLIB_USE_CUDA
|
||||
cuda::concat_depth(dest, sample_offset, src);
|
||||
cuda::copy_tensor(dest, dest_k_offset, src, src_k_offset, count_k);
|
||||
#else
|
||||
cpu::concat_depth(dest, sample_offset, src);
|
||||
cpu::copy_tensor(dest, dest_k_offset, src, src_k_offset, count_k);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void split_depth(tensor& dest, size_t sample_offset, const tensor& src)
|
||||
{
|
||||
#ifdef DLIB_USE_CUDA
|
||||
cuda::split_depth(dest, sample_offset, src);
|
||||
#else
|
||||
cpu::split_depth(dest, sample_offset, src);
|
||||
#endif
|
||||
}
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}}
|
||||
|
@ -1234,41 +1234,25 @@ namespace dlib { namespace tt
|
||||
};
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void concat_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.nc() == src.nc()
|
||||
- dest.nr() == src.nr()
|
||||
- dest.num_samples() == src.num_samples()
|
||||
- dest.k() >= src.k() + sample_offset
|
||||
- is_same_object(dest,src) == false
|
||||
- sample_offset a count of elements, not bytes
|
||||
ensures
|
||||
- performs: dest[i, k + sample_offset, r, c] = src[i, k, r, c], where k in [0..src.k()]
|
||||
Copies content of each sample from src in to corresponding place of sample at dst
|
||||
!*/
|
||||
|
||||
void split_depth(
|
||||
tensor& dest,
|
||||
size_t sample_offset,
|
||||
const tensor& src
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.nc() == src.nc()
|
||||
- dest.nr() == src.nr()
|
||||
- dest.num_samples() == src.num_samples()
|
||||
- dest.k() <= src.k() - sample_offset
|
||||
- is_same_object(dest,src) == false
|
||||
- sample_offset a count of elements, not bytes
|
||||
ensures
|
||||
- performs: dest[i, k, r, c] = src[i, k + sample_offset, r, c], where k in [0..dest.k()]
|
||||
Fills each sample of dst from the corresponding part of each sample at src
|
||||
!*/
|
||||
void copy_tensor(
|
||||
tensor& dest,
|
||||
size_t dest_k_offset,
|
||||
const tensor& src,
|
||||
size_t src_k_offset,
|
||||
size_t count_k
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.nc() == src.nc()
|
||||
- dest.nr() == src.nr()
|
||||
- dest.num_samples() == src.num_samples()
|
||||
- dest.k() - dest_k_offset >= count_k
|
||||
- src.k() - src_k_offset >= count_k
|
||||
- is_same_object(dest,src) == false
|
||||
ensures
|
||||
- performs: dest[i, k + dest_k_offset, r, c] = src[i, k + src_k_offset, r, c], where k in [0..count_k]
|
||||
Copies content of each sample from src in to corresponding place of sample at dst
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -12,7 +12,77 @@
|
||||
#include "tester.h"
|
||||
|
||||
|
||||
namespace
|
||||
namespace dlib{
|
||||
template <typename SUBNET> using concat_block1 = con<5,1,1,1,1,SUBNET>;
|
||||
template <typename SUBNET> using concat_block2 = con<8,3,3,1,1,SUBNET>;
|
||||
template <typename SUBNET> using concat_block3 = max_pool<3,3,1,1,SUBNET>;
|
||||
template <typename SUBNET> using concat_incept = inception3<concat_block1,concat_block2,concat_block3,SUBNET>;
|
||||
|
||||
// this class is a friend of add_layer and can access private members
|
||||
class dnn_tester{
|
||||
public:
|
||||
// tester function is a member to have access to a private x_grad member of add_layer
|
||||
static void test_concat()
|
||||
{
|
||||
using namespace test;
|
||||
using namespace std;
|
||||
using namespace dlib::tt;
|
||||
print_spinner();
|
||||
|
||||
using net_type = concat_incept<input<matrix<float>>>;
|
||||
|
||||
resizable_tensor data(10, 1, 111, 222);
|
||||
data = matrix_cast<float>(gaussian_randm(data.num_samples(), data.k() * data.nr() * data.nc(), 1));
|
||||
|
||||
net_type net;
|
||||
|
||||
|
||||
auto& out = net.forward(data);
|
||||
|
||||
auto& b1o = layer<itag1>(net).get_output();
|
||||
auto& b2o = layer<itag2>(net).get_output();
|
||||
auto& b3o = layer<itag3>(net).get_output();
|
||||
|
||||
resizable_tensor dest(10, 14, 111, 222);
|
||||
copy_tensor(dest, 0, b1o, 0, b1o.k());
|
||||
copy_tensor(dest, b1o.k(), b2o, 0, b2o.k());
|
||||
copy_tensor(dest, b1o.k() + b2o.k(), b3o, 0, b3o.k());
|
||||
|
||||
DLIB_TEST(dest.size() == out.size());
|
||||
int error = memcmp(dest.host(), out.host(), dest.size());
|
||||
DLIB_TEST(error == 0);
|
||||
|
||||
resizable_tensor gr(10, 14, 111, 222);
|
||||
gr = matrix_cast<float>(gaussian_randm(gr.num_samples(), gr.k() * gr.nr() * gr.nc(), 1));
|
||||
memcpy(net.get_gradient_input(), gr);
|
||||
|
||||
net.back_propagate_error(data);
|
||||
|
||||
auto& b1g = layer<itag1>(net).subnet().x_grad;
|
||||
auto& b2g = layer<itag2>(net).subnet().x_grad;
|
||||
auto& b3g = layer<itag3>(net).subnet().x_grad;
|
||||
|
||||
resizable_tensor g1(10, 5, 111, 222);
|
||||
resizable_tensor g2(10, 8, 111, 222);
|
||||
resizable_tensor g3(10, 1, 111, 222);
|
||||
|
||||
copy_tensor(g1, 0, gr, 0, g1.k());
|
||||
copy_tensor(g2, 0, gr, g1.k(), g2.k());
|
||||
copy_tensor(g3, 0, gr, g1.k() + g2.k(), g3.k());
|
||||
DLIB_TEST(g1.size() == b1g.size());
|
||||
error = memcmp(g1.host(), b1g.host(), b1g.size());
|
||||
DLIB_TEST(error == 0);
|
||||
DLIB_TEST(g2.size() == b2g.size());
|
||||
error = memcmp(g2.host(), b2g.host(), b2g.size());
|
||||
DLIB_TEST(error == 0);
|
||||
DLIB_TEST(g3.size() == b3g.size());
|
||||
error = memcmp(g3.host(), b3g.host(), b3g.size());
|
||||
DLIB_TEST(error == 0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using namespace test;
|
||||
@ -1405,6 +1475,121 @@ namespace
|
||||
DLIB_TEST(count == pnet.num_computational_layers);
|
||||
}
|
||||
|
||||
float tensor_read_cpu(const tensor& t, long i, long k, long r, long c)
|
||||
{
|
||||
const float* p = t.host() + t.k() * t.nr() * t.nc() * i +
|
||||
t.nr() * t.nc() * k + t.nc() * r + c;
|
||||
return *p;
|
||||
}
|
||||
void test_copy_tensor_cpu()
|
||||
{
|
||||
using namespace dlib::tt;
|
||||
print_spinner();
|
||||
resizable_tensor dest(10, 9, 7, 15);
|
||||
resizable_tensor src1(10, 3, 7, 15);
|
||||
resizable_tensor src2(10, 3, 7, 15);
|
||||
resizable_tensor src3(10, 9, 7, 15);
|
||||
dest = matrix_cast<float>(gaussian_randm(dest.num_samples(), dest.k() * dest.nr() * dest.nc(), 1));
|
||||
src1 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src1.k() * src1.nr() * src1.nc(), 0));
|
||||
src2 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src2.k() * src2.nr() * src2.nc(), 0));
|
||||
src3 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src3.k() * src3.nr() * src3.nc(), 0));
|
||||
|
||||
cpu::copy_tensor(dest, 0, src1, 0, src1.k()); //full copy src1->dest
|
||||
cpu::copy_tensor(dest, src1.k(), src2, 0, src2.k()); //full copy src2->dest with offset of src1
|
||||
cpu::copy_tensor(dest, src1.k() + src2.k(), src3, 3, 3); //partial copy src3 into the rest place of dest
|
||||
|
||||
|
||||
for (long i = 0; i < dest.num_samples(); ++i)
|
||||
{
|
||||
for (long k = 0; k < dest.k(); ++k)
|
||||
{
|
||||
for (long r = 0; r < dest.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < dest.nc(); ++c)
|
||||
{
|
||||
float dest_value = tensor_read_cpu(dest, i, k, r, c);
|
||||
// first part is from src1
|
||||
if (k < src1.k())
|
||||
{
|
||||
float src_value = tensor_read_cpu(src1, i, k, r, c);
|
||||
DLIB_TEST(src_value == dest_value);
|
||||
}
|
||||
// second part is from src2
|
||||
else if (k < src1.k() + src2.k())
|
||||
{
|
||||
float src_value = tensor_read_cpu(src2, i, k - src1.k(), r, c);
|
||||
DLIB_TEST(src_value == dest_value);
|
||||
}
|
||||
// third part is from src3
|
||||
else
|
||||
{
|
||||
float src_value = tensor_read_cpu(src3, i, k - src1.k() - src2.k() + 3, r, c);
|
||||
DLIB_TEST(src_value == dest_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef DLIB_USE_CUDA
|
||||
float tensor_read_gpu(const tensor& t, long i, long k, long r, long c)
|
||||
{
|
||||
const float* p = t.device() + t.k() * t.nr() * t.nc() * i +
|
||||
t.nr() * t.nc() * k + t.nc() * r + c;
|
||||
return *p;
|
||||
}
|
||||
void test_copy_tensor_gpu()
|
||||
{
|
||||
using namespace dlib::tt;
|
||||
print_spinner();
|
||||
resizable_tensor dest(10, 9, 7, 15);
|
||||
resizable_tensor src1(10, 3, 7, 15);
|
||||
resizable_tensor src2(10, 3, 7, 15);
|
||||
resizable_tensor src3(10, 9, 7, 15);
|
||||
dest = matrix_cast<float>(gaussian_randm(dest.num_samples(), dest.k() * dest.nr() * dest.nc(), 1));
|
||||
src1 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src1.k() * src1.nr() * src1.nc(), 0));
|
||||
src2 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src2.k() * src2.nr() * src2.nc(), 0));
|
||||
src3 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src3.k() * src3.nr() * src3.nc(), 0));
|
||||
|
||||
gpu::copy_tensor(dest, 0, src1, 0, src1.k()); //full copy src1->dest
|
||||
gpu::copy_tensor(dest, src1.k(), src2, 0, src2.k()); //full copy src2->dest with offset of src1
|
||||
gpu::copy_tensor(dest, src1.k() + src2.k(), src3, 3, 3); //partial copy src3 into the rest place of dest
|
||||
|
||||
|
||||
for (long i = 0; i < dest.num_samples(); ++i)
|
||||
{
|
||||
for (long k = 0; k < dest.k(); ++k)
|
||||
{
|
||||
for (long r = 0; r < dest.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < dest.nc(); ++c)
|
||||
{
|
||||
float dest_value = tensor_read_gpu(dest, i, k, r, c);
|
||||
// first part is from src1
|
||||
if (k < src1.k())
|
||||
{
|
||||
float src_value = tensor_read_gpu(src1, i, k, r, c);
|
||||
DLIB_TEST(src_value == dest_value);
|
||||
}
|
||||
// second part is from src2
|
||||
else if (k < src1.k() + src2.k())
|
||||
{
|
||||
float src_value = tensor_read_gpu(src2, i, k - src1.k(), r, c);
|
||||
DLIB_TEST(src_value == dest_value);
|
||||
}
|
||||
// third part is from src3
|
||||
else
|
||||
{
|
||||
float src_value = tensor_read_gpu(src3, i, k - src1.k() - src2.k() + 3, r, c);
|
||||
DLIB_TEST(src_value == dest_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif//DLIB_USE_CUDA
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class dnn_tester : public tester
|
||||
@ -1433,6 +1618,7 @@ namespace
|
||||
compare_bn_conv_gpu_and_cpu();
|
||||
test_add();
|
||||
compare_adam();
|
||||
test_copy_tensor_gpu();
|
||||
#endif
|
||||
test_max_pool(1,1,2,3,0,0);
|
||||
test_max_pool(3,3,1,1,0,0);
|
||||
@ -1466,6 +1652,8 @@ namespace
|
||||
test_basic_tensor_ops();
|
||||
test_layers();
|
||||
test_visit_funcions();
|
||||
test_copy_tensor_cpu();
|
||||
dlib::dnn_tester::test_concat();
|
||||
}
|
||||
} a;
|
||||
|
||||
|
@ -15,22 +15,42 @@
|
||||
#include <dlib/dnn.h>
|
||||
#include <iostream>
|
||||
#include <dlib/data_io.h>
|
||||
#include <tuple>
|
||||
|
||||
using namespace std;
|
||||
using namespace dlib;
|
||||
|
||||
// Here we define inception module as described in GoogLeNet specification. The depth of each sublayer can be changed
|
||||
template<typename SUBNET>
|
||||
using inception = grp<std::tuple<con<8,1,1,1,1, group_input>,
|
||||
con<8,3,3,1,1, con<8,1,1,1,1, group_input>>,
|
||||
con<8,5,5,1,1, con<8,1,1,1,1, group_input>>,
|
||||
con<8,1,1,1,1, max_pool<3,3,1,1, group_input>>>,
|
||||
SUBNET>;
|
||||
// Inception layer has some different convolutions inside
|
||||
// Here we define blocks as convolutions with different kernel size that we will use in
|
||||
// inception layer block.
|
||||
template <typename SUBNET> using block_a1 = relu<con<4,1,1,1,1,SUBNET>>;
|
||||
template <typename SUBNET> using block_a2 = relu<con<4,3,3,1,1,relu<con<4,1,1,1,1,SUBNET>>>>;
|
||||
template <typename SUBNET> using block_a3 = relu<con<4,5,5,1,1,relu<con<4,1,1,1,1,SUBNET>>>>;
|
||||
template <typename SUBNET> using block_a4 = relu<con<4,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>;
|
||||
|
||||
// Here is inception layer definition. It uses different blocks to process input and returns combined output
|
||||
template <typename SUBNET> using incept_a = inception4<block_a1,block_a2,block_a3,block_a4, SUBNET>;
|
||||
|
||||
// Network can have inception layers of different structure.
|
||||
// Here are blocks with different convolutions
|
||||
template <typename SUBNET> using block_b1 = relu<con<8,1,1,1,1,SUBNET>>;
|
||||
template <typename SUBNET> using block_b2 = relu<con<8,3,3,1,1,SUBNET>>;
|
||||
template <typename SUBNET> using block_b3 = relu<con<8,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>;
|
||||
|
||||
// Here is inception layer definition. It uses different blocks to process input and returns combined output
|
||||
template <typename SUBNET> using incept_b = inception3<block_b1,block_b2,block_b3,SUBNET>;
|
||||
|
||||
// and then the network type is
|
||||
using net_type = loss_multiclass_log<
|
||||
fc<10,
|
||||
relu<fc<32,
|
||||
max_pool<2,2,2,2,incept_b<
|
||||
max_pool<2,2,2,2,incept_a<
|
||||
input<matrix<unsigned char>>
|
||||
>>>>>>>>;
|
||||
|
||||
int main(int argc, char** argv) try
|
||||
{
|
||||
// This example is going to run on the MNIST dataset.
|
||||
// This example is going to run on the MNIST dataset.
|
||||
if (argc != 2)
|
||||
{
|
||||
cout << "This example needs the MNIST dataset to run!" << endl;
|
||||
@ -48,25 +68,10 @@ int main(int argc, char** argv) try
|
||||
load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);
|
||||
|
||||
|
||||
// Create a the same network as in dnn_mnist_ex, but use inception layer insteam of convolution
|
||||
// in the middle
|
||||
using net_type = loss_multiclass_log<
|
||||
fc<10,
|
||||
relu<fc<84,
|
||||
relu<fc<120,
|
||||
max_pool<2,2,2,2,relu<inception<
|
||||
max_pool<2,2,2,2,relu<con<6,5,5,1,1,
|
||||
input<matrix<unsigned char>>
|
||||
>>>>>>>>>>>>;
|
||||
|
||||
|
||||
// Create a network as defined above. This network will produce 10 outputs
|
||||
// because that's how we defined net_type. However, fc layers can have the
|
||||
// number of outputs they produce changed at runtime.
|
||||
// The rest of the sample is identical to dnn_minst_ex
|
||||
// Create network of predefined type.
|
||||
net_type net;
|
||||
|
||||
// the following training process is the same as in dnn_mnist_ex sample
|
||||
|
||||
// And then train it using the MNIST data. The code below uses mini-batch stochastic
|
||||
// gradient descent with an initial learning rate of 0.01 to accomplish this.
|
||||
dnn_trainer<net_type> trainer(net);
|
||||
@ -80,12 +85,12 @@ int main(int argc, char** argv) try
|
||||
// from scratch. This is because, when the program restarts, this call to
|
||||
// set_synchronization_file() will automatically reload the settings from mnist_sync if
|
||||
// the file exists.
|
||||
trainer.set_synchronization_file("mnist_sync", std::chrono::seconds(20));
|
||||
trainer.set_synchronization_file("inception_sync", std::chrono::seconds(20));
|
||||
// Finally, this line begins training. By default, it runs SGD with our specified
|
||||
// learning rate until the loss stops decreasing. Then it reduces the learning rate by
|
||||
// a factor of 10 and continues running until the loss stops decreasing again. It will
|
||||
// keep doing this until the learning rate has dropped below the min learning rate
|
||||
// defined above or the maximum number of epochs as been executed (defaulted to 10000).
|
||||
// defined above or the maximum number of epochs as been executed (defaulted to 10000).
|
||||
trainer.train(training_images, training_labels);
|
||||
|
||||
// At this point our net object should have learned how to classify MNIST images. But
|
||||
@ -96,7 +101,7 @@ int main(int argc, char** argv) try
|
||||
// about that kind of transient data so that our file will be smaller. We do this by
|
||||
// "cleaning" the network before saving it.
|
||||
net.clean();
|
||||
serialize("mnist_network.dat") << net;
|
||||
serialize("mnist_network_inception.dat") << net;
|
||||
// Now if we later wanted to recall the network from disk we can simply say:
|
||||
// deserialize("mnist_network.dat") >> net;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user