@ -631,7 +631,7 @@ namespace
DLIB_TEST ( : : std : : abs ( rs . stddev ( ) - 1.0f ) < 0.01 ) ;
DLIB_TEST ( : : std : : abs ( rs . stddev ( ) - 1.0f ) < 0.01 ) ;
}
}
// check that the CPU and the CUDA implementation are equivalent
// check that the CPU and the CUDA implementation are equivalent
# if DLIB_USE_CUDA
# if def DLIB_USE_CUDA
resizable_tensor y_cuda ( x ) ;
resizable_tensor y_cuda ( x ) ;
resizable_tensor means_cuda ( x . num_samples ( ) ) , invstds_cuda ( x . num_samples ( ) ) ;
resizable_tensor means_cuda ( x . num_samples ( ) ) , invstds_cuda ( x . num_samples ( ) ) ;
cuda : : layer_normalize ( eps , y_cuda , means_cuda , invstds_cuda , x , gamma , beta ) ;
cuda : : layer_normalize ( eps , y_cuda , means_cuda , invstds_cuda , x , gamma , beta ) ;
@ -655,6 +655,99 @@ namespace
# endif
# endif
}
}
// ----------------------------------------------------------------------------------------
void test_rms_normalize ( )
{
resizable_tensor x ( 2 , 3 , 4 , 5 ) ;
resizable_tensor y_cpu ( x ) ;
tt : : tensor_rand rnd ( 0 ) ;
rnd . fill_uniform ( x ) ;
resizable_tensor scale_cpu ;
resizable_tensor gamma ( 1 , x . k ( ) ) ;
gamma = 1 ;
const float eps = 1e-5 ;
cpu : : rms_normalize ( eps , y_cpu , scale_cpu , x , gamma ) ;
// check that the output is correctly normalized
const float * p_x = x . host ( ) ;
const float * p_y = y_cpu . host ( ) ;
const float * p_scale = scale_cpu . host ( ) ;
bool error_found = false ;
for ( long n = 0 ; n < x . num_samples ( ) ; + + n )
{
for ( long k = 0 ; k < x . k ( ) ; + + k )
{
for ( long r = 0 ; r < x . nr ( ) ; + + r )
{
for ( long c = 0 ; c < x . nc ( ) ; + + c )
{
float x_val = p_x [ tensor_index ( x , n , k , r , c ) ] ;
float y_val = p_y [ tensor_index ( y_cpu , n , k , r , c ) ] ;
float rms_val = p_scale [ n ] ;
if ( std : : abs ( y_val - x_val * rms_val ) > = 1e-5 ) error_found = true ;
}
}
}
}
DLIB_TEST ( ! error_found ) ;
// check the backward pass
resizable_tensor gradient_input ( x ) ;
resizable_tensor src_grad_cpu ( x ) , gamma_grad_cpu ( 1 , x . k ( ) ) ;
resizable_tensor dscale_cpu ( x . num_samples ( ) ) ;
rnd . fill_gaussian ( gradient_input ) ;
src_grad_cpu = 0 ;
cpu : : rms_normalize_gradient ( gradient_input , scale_cpu , x , gamma , src_grad_cpu , gamma_grad_cpu , dscale_cpu ) ;
const float * p_gradient_input = gradient_input . host ( ) ;
const float * p_src = x . host ( ) ;
const float * p_src_grad_cpu = src_grad_cpu . host ( ) ;
const float * p_gamma = gamma . host ( ) ;
const float * p_scale_cpu = scale_cpu . host ( ) ;
const float * p_dscale_cpu = dscale_cpu . host ( ) ;
bool backward_error_found = false ;
for ( long n = 0 ; n < x . num_samples ( ) ; + + n )
{
const float scale_pow = - 0.5 * std : : pow ( p_scale_cpu [ n ] , 3.0f ) ;
for ( long k = 0 ; k < x . k ( ) ; + + k )
{
for ( long r = 0 ; r < x . nr ( ) ; + + r )
{
for ( long c = 0 ; c < x . nc ( ) ; + + c )
{
float gradient_input_val = p_gradient_input [ tensor_index ( gradient_input , n , k , r , c ) ] ;
float src_val = p_src [ tensor_index ( x , n , k , r , c ) ] ;
float rms_val = p_scale_cpu [ n ] ;
float expected_src_grad = gradient_input_val * p_gamma [ k ] * rms_val + p_dscale_cpu [ n ] * 2 * src_val * 1.0f / ( x . k ( ) * x . nr ( ) * x . nc ( ) ) ;
float src_grad_val = p_src_grad_cpu [ tensor_index ( src_grad_cpu , n , k , r , c ) ] ;
if ( std : : abs ( src_grad_val - expected_src_grad ) > = 1e-4 )
backward_error_found = true ;
}
}
}
}
DLIB_TEST ( ! backward_error_found ) ;
// check that the CPU and the CUDA implementation are equivalent
# ifdef DLIB_USE_CUDA
resizable_tensor y_cuda ( x ) ;
resizable_tensor scale_cuda ;
cuda : : rms_normalize ( eps , y_cuda , scale_cuda , x , gamma ) ;
DLIB_TEST ( max ( abs ( mat ( y_cpu ) - mat ( y_cuda ) ) ) < 1e-5 ) ;
DLIB_TEST ( max ( abs ( mat ( scale_cpu ) - mat ( scale_cuda ) ) ) < 1e-5 ) ;
resizable_tensor src_grad_cuda ( x ) , gamma_grad_cuda ( 1 , x . k ( ) ) ;
resizable_tensor dscale_cuda ( x . num_samples ( ) ) ;
src_grad_cuda = 0 ;
cuda : : rms_normalize_gradient ( gradient_input , scale_cuda , x , gamma , src_grad_cuda , gamma_grad_cuda , dscale_cuda ) ;
DLIB_TEST ( max ( abs ( mat ( src_grad_cpu ) - mat ( src_grad_cuda ) ) ) < 1e-5 ) ;
DLIB_TEST ( max ( abs ( mat ( gamma_grad_cpu ) - mat ( gamma_grad_cuda ) ) ) < 1e-5 ) ;
DLIB_TEST ( max ( abs ( mat ( dscale_cpu ) - mat ( dscale_cuda ) ) ) < 1e-5 ) ;
# endif
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
void test_basic_tensor_ops ( )
void test_basic_tensor_ops ( )
@ -2013,6 +2106,12 @@ namespace
auto res = test_layer ( l ) ;
auto res = test_layer ( l ) ;
DLIB_TEST_MSG ( res , res ) ;
DLIB_TEST_MSG ( res , res ) ;
}
}
{
print_spinner ( ) ;
rms_norm_ l ;
auto res = test_layer ( l ) ;
DLIB_TEST_MSG ( res , res ) ;
}
{
{
print_spinner ( ) ;
print_spinner ( ) ;
cont_ < 3 , 3 , 3 , 2 , 2 , 0 , 0 > l ;
cont_ < 3 , 3 , 3 , 2 , 2 , 0 , 0 > l ;
@ -4389,6 +4488,7 @@ namespace
test_batch_normalize ( ) ;
test_batch_normalize ( ) ;
test_batch_normalize_conv ( ) ;
test_batch_normalize_conv ( ) ;
test_layer_normalize ( ) ;
test_layer_normalize ( ) ;
test_rms_normalize ( ) ;
test_basic_tensor_ops ( ) ;
test_basic_tensor_ops ( ) ;
test_layers ( ) ;
test_layers ( ) ;
test_visit_functions ( ) ;
test_visit_functions ( ) ;