First changes for SIMD matrix operations

This commit is contained in:
Erik Hofman 2016-11-22 14:27:42 +01:00
parent 5681fcbdc5
commit 22a74c63b4
7 changed files with 477 additions and 208 deletions

View File

@ -33,6 +33,7 @@ set(HEADERS
sg_types.hxx
sg_random.h
simd.hxx
simd4x4.hxx
)
set(SOURCES

View File

@ -18,6 +18,8 @@
#ifndef SGMatrix_H
#define SGMatrix_H
#include "simd4x4.hxx"
/// Expression templates for poor programmers ... :)
template<typename T>
struct TransNegRef;
@ -38,13 +40,13 @@ public:
/// uninitialized values in the debug build very fast ...
#ifndef NDEBUG
for (unsigned i = 0; i < nEnts; ++i)
_data.flat[i] = SGLimits<T>::quiet_NaN();
_data[i] = SGLimits<T>::quiet_NaN();
#endif
}
/// Constructor. Initialize by the content of a plain array,
/// make sure it has at least 16 elements
explicit SGMatrix(const T* data)
{ for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] = data[i]; }
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] = data[i]; }
/// Constructor, build up a SGMatrix from given elements
SGMatrix(T m00, T m01, T m02, T m03,
@ -52,14 +54,14 @@ public:
T m20, T m21, T m22, T m23,
T m30, T m31, T m32, T m33)
{
_data.flat[0] = m00; _data.flat[1] = m10;
_data.flat[2] = m20; _data.flat[3] = m30;
_data.flat[4] = m01; _data.flat[5] = m11;
_data.flat[6] = m21; _data.flat[7] = m31;
_data.flat[8] = m02; _data.flat[9] = m12;
_data.flat[10] = m22; _data.flat[11] = m32;
_data.flat[12] = m03; _data.flat[13] = m13;
_data.flat[14] = m23; _data.flat[15] = m33;
_data[0] = m00; _data[1] = m10;
_data[2] = m20; _data[3] = m30;
_data[4] = m01; _data[5] = m11;
_data[6] = m21; _data[7] = m31;
_data[8] = m02; _data[9] = m12;
_data[10] = m22; _data[11] = m32;
_data[12] = m03; _data[13] = m13;
_data[14] = m23; _data[15] = m33;
}
/// Constructor, build up a SGMatrix from a translation
@ -80,14 +82,14 @@ public:
template<typename S>
void set(const SGVec3<S>& trans)
{
_data.flat[0] = 1; _data.flat[4] = 0;
_data.flat[8] = 0; _data.flat[12] = T(trans(0));
_data.flat[1] = 0; _data.flat[5] = 1;
_data.flat[9] = 0; _data.flat[13] = T(trans(1));
_data.flat[2] = 0; _data.flat[6] = 0;
_data.flat[10] = 1; _data.flat[14] = T(trans(2));
_data.flat[3] = 0; _data.flat[7] = 0;
_data.flat[11] = 0; _data.flat[15] = 1;
_data[0] = 1; _data[4] = 0;
_data[8] = 0; _data[12] = T(trans(0));
_data[1] = 0; _data[5] = 1;
_data[9] = 0; _data[13] = T(trans(1));
_data[2] = 0; _data[6] = 0;
_data[10] = 1; _data[14] = T(trans(2));
_data[3] = 0; _data[7] = 0;
_data[11] = 0; _data[15] = 1;
}
/// Set from a scale/rotation and tranlation
@ -98,82 +100,89 @@ public:
T xx = x*x; T yy = y*y; T zz = z*z;
T wx = w*x; T wy = w*y; T wz = w*z;
T xy = x*y; T xz = x*z; T yz = y*z;
_data.flat[0] = 1-2*(yy+zz); _data.flat[1] = 2*(xy-wz);
_data.flat[2] = 2*(xz+wy); _data.flat[3] = 0;
_data.flat[4] = 2*(xy+wz); _data.flat[5] = 1-2*(xx+zz);
_data.flat[6] = 2*(yz-wx); _data.flat[7] = 0;
_data.flat[8] = 2*(xz-wy); _data.flat[9] = 2*(yz+wx);
_data.flat[10] = 1-2*(xx+yy); _data.flat[11] = 0;
_data.flat[12] = 0; _data.flat[13] = 0;
_data.flat[14] = 0; _data.flat[15] = 1;
_data[0] = 1-2*(yy+zz); _data[1] = 2*(xy-wz);
_data[2] = 2*(xz+wy); _data[3] = 0;
_data[4] = 2*(xy+wz); _data[5] = 1-2*(xx+zz);
_data[6] = 2*(yz-wx); _data[7] = 0;
_data[8] = 2*(xz-wy); _data[9] = 2*(yz+wx);
_data[10] = 1-2*(xx+yy); _data[11] = 0;
_data[12] = 0; _data[13] = 0;
_data[14] = 0; _data[15] = 1;
}
/// set from a transposed negated matrix
void set(const TransNegRef<T>& tm)
{
const SGMatrix& m = tm.m;
_data.flat[0] = m(0,0);
_data.flat[1] = m(0,1);
_data.flat[2] = m(0,2);
_data.flat[3] = m(3,0);
_data[0] = m(0,0);
_data[1] = m(0,1);
_data[2] = m(0,2);
_data[3] = m(3,0);
_data.flat[4] = m(1,0);
_data.flat[5] = m(1,1);
_data.flat[6] = m(1,2);
_data.flat[7] = m(3,1);
_data[4] = m(1,0);
_data[5] = m(1,1);
_data[6] = m(1,2);
_data[7] = m(3,1);
_data.flat[8] = m(2,0);
_data.flat[9] = m(2,1);
_data.flat[10] = m(2,2);
_data.flat[11] = m(3,2);
_data[8] = m(2,0);
_data[9] = m(2,1);
_data[10] = m(2,2);
_data[11] = m(3,2);
// Well, this one is ugly here, as that xform method on the current
// object needs the above data to be already set ...
SGVec3<T> t = xformVec(SGVec3<T>(m(0,3), m(1,3), m(2,3)));
_data.flat[12] = -t(0);
_data.flat[13] = -t(1);
_data.flat[14] = -t(2);
_data.flat[15] = m(3,3);
_data[12] = -t(0);
_data[13] = -t(1);
_data[14] = -t(2);
_data[15] = m(3,3);
}
/// Access by index, the index is unchecked
const T& operator()(unsigned i, unsigned j) const
{ return _data.flat[i + 4*j]; }
{ return _data[i + 4*j]; }
/// Access by index, the index is unchecked
T& operator()(unsigned i, unsigned j)
{ return _data.flat[i + 4*j]; }
{ return _data[i + 4*j]; }
/// Access raw data by index, the index is unchecked
const T& operator[](unsigned i) const
{ return _data.flat[i]; }
{ return _data[i]; }
/// Access by index, the index is unchecked
T& operator[](unsigned i)
{ return _data.flat[i]; }
{ return _data[i]; }
/// Get the data pointer
const T* data(void) const
{ return _data.flat; }
{ return _data; }
/// Get the data pointer
T* data(void)
{ return _data.flat; }
{ return _data; }
/// Readonly interface function to ssg's sgMat4/sgdMat4
const T (&sg(void) const)[4][4]
{ return _data.carray; }
{ return _data.ptr(); }
/// Interface function to ssg's sgMat4/sgdMat4
T (&sg(void))[4][4]
{ return _data.carray; }
{ return _data.ptr(); }
/// Readonly raw storage interface
const simd4x4_t<T,4> (&simd4x4(void) const)
{ return _data; }
/// Readonly raw storage interface
simd4x4_t<T,4> (&simd4x4(void))
{ return _data; }
/// Inplace addition
SGMatrix& operator+=(const SGMatrix& m)
{ for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] += m._data.flat[i]; return *this; }
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] += m._data[i]; return *this; }
/// Inplace subtraction
SGMatrix& operator-=(const SGMatrix& m)
{ for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] -= m._data.flat[i]; return *this; }
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] -= m._data[i]; return *this; }
/// Inplace scalar multiplication
template<typename S>
SGMatrix& operator*=(S s)
{ for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] *= s; return *this; }
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] *= s; return *this; }
/// Inplace scalar multiplication by 1/s
template<typename S>
SGMatrix& operator/=(S s)
@ -259,22 +268,15 @@ public:
/// Return an all zero matrix
static SGMatrix zeros(void)
{ return SGMatrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); }
{ SGMatrix r; simd4x4::zeros(r.simd4x4()); return r; }
/// Return a unit matrix
static SGMatrix unit(void)
{ return SGMatrix(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); }
{ SGMatrix r; simd4x4::unit(r.simd4x4()); return r; }
private:
/// Required to make that alias safe.
union Data {
T flat[16];
T carray[4][4];
};
/// The actual data, the matrix is stored in column major order,
/// that matches the storage format of OpenGL
Data _data;
simd4x4_t<T,4> _data;
};
/// Class to distinguish between a matrix and the matrix with a transposed
@ -296,60 +298,55 @@ operator+(const SGMatrix<T>& m)
template<typename T>
inline
SGMatrix<T>
operator-(const SGMatrix<T>& m)
operator-(SGMatrix<T> m)
{
SGMatrix<T> ret;
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
ret[i] = -m[i];
return ret;
for (unsigned i = 0; i < SGMatrix<T>::nRows; ++i)
m[i] = -m[i];
return m;
}
/// Binary +
template<typename T>
inline
SGMatrix<T>
operator+(const SGMatrix<T>& m1, const SGMatrix<T>& m2)
operator+(SGMatrix<T> m1, const SGMatrix<T>& m2)
{
SGMatrix<T> ret;
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
ret[i] = m1[i] + m2[i];
return ret;
m1[i] += m2[i];
return m1;
}
/// Binary -
template<typename T>
inline
SGMatrix<T>
operator-(const SGMatrix<T>& m1, const SGMatrix<T>& m2)
operator-(SGMatrix<T> m1, const SGMatrix<T>& m2)
{
SGMatrix<T> ret;
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
ret[i] = m1[i] - m2[i];
return ret;
m1[i] -= m2[i];
return m1;
}
/// Scalar multiplication
template<typename S, typename T>
inline
SGMatrix<T>
operator*(S s, const SGMatrix<T>& m)
operator*(S s, SGMatrix<T> m)
{
SGMatrix<T> ret;
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
ret[i] = s*m[i];
return ret;
m[i] *= s;
return m;
}
/// Scalar multiplication
template<typename S, typename T>
inline
SGMatrix<T>
operator*(const SGMatrix<T>& m, S s)
operator*(SGMatrix<T> m, S s)
{
SGMatrix<T> ret;
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
ret[i] = s*m[i];
return ret;
m[i] *= s;
return m;
}
/// Vector multiplication

View File

@ -244,15 +244,18 @@ length(const SGVec2<T>& v)
template<typename T>
inline
T
norm1(const SGVec2<T>& v)
{ return fabs(v(0)) + fabs(v(1)); }
norm1(SGVec2<T> v)
{ v.simd2() = simd4::abs(v.simd2()); return (v(0)+v(1)); }
/// The inf-norm of the vector
template<typename T>
inline
T
normI(const SGVec2<T>& v)
{ return SGMisc<T>::max(fabs(v(0)), fabs(v(1))); }
normI(SGVec2<T> v)
{
v.simd2() = simd4::abs(v.simd2());
return SGMisc<T>::max(v(0), v(1));
}
/// The euclidean norm of the vector, that is what most people call length
template<typename T>
@ -383,11 +386,11 @@ operator<<(std::basic_ostream<char_type, traits_type>& s, const SGVec2<T>& v)
inline
SGVec2f
toVec2f(const SGVec2d& v)
{ return SGVec2f((float)v(0), (float)v(1)); }
{ SGVec2f f(v); return f; }
inline
SGVec2d
toVec2d(const SGVec2f& v)
{ return SGVec2d(v(0), v(1)); }
{ SGVec2d d(v); return d; }
#endif

View File

@ -338,15 +338,18 @@ length(const SGVec3<T>& v)
template<typename T>
inline
T
norm1(const SGVec3<T>& v)
{ return fabs(v(0)) + fabs(v(1)) + fabs(v(2)); }
norm1(SGVec3<T> v)
{ v.simd3() = simd4::abs(v.simd3()); return (v(0)+v(1)+v(2)); }
/// The inf-norm of the vector
template<typename T>
inline
T
normI(const SGVec3<T>& v)
{ return SGMisc<T>::max(fabs(v(0)), fabs(v(1)), fabs(v(2))); }
normI(SGVec3<T> v)
{
v.simd3() = simd4::abs(v.simd3());
return SGMisc<T>::max(v(0), v(1), v(2));
}
/// Vector cross product
template<typename T>
@ -519,11 +522,11 @@ operator<<(std::basic_ostream<char_type, traits_type>& s, const SGVec3<T>& v)
inline
SGVec3f
toVec3f(const SGVec3d& v)
{ return SGVec3f((float)v(0), (float)v(1), (float)v(2)); }
{ SGVec3f f(v); return f; }
inline
SGVec3d
toVec3d(const SGVec3f& v)
{ return SGVec3d(v(0), v(1), v(2)); }
{ SGVec3d d(v); return d; }
#endif

View File

@ -291,15 +291,18 @@ length(const SGVec4<T>& v)
template<typename T>
inline
T
norm1(const SGVec4<T>& v)
{ return fabs(v(0)) + fabs(v(1)) + fabs(v(2)) + fabs(v(3)); }
norm1(SGVec4<T> v)
{ v.simd4() = simd4::abs(v.simd4()); return (v(0)+v(1)+v(2)+v(3)); }
/// The inf-norm of the vector
template<typename T>
inline
T
normI(const SGVec4<T>& v)
{ return SGMisc<T>::max(fabs(v(0)), fabs(v(1)), fabs(v(2)), fabs(v(2))); }
normI(SGVec4<T> v)
{
v.simd4() = simd4::abs(v.simd4());
return SGMisc<T>::max(v(0), v(1), v(2), v(3));
}
/// The euclidean norm of the vector, that is what most people call length
template<typename T>
@ -439,11 +442,11 @@ operator<<(std::basic_ostream<char_type, traits_type>& s, const SGVec4<T>& v)
inline
SGVec4f
toVec4f(const SGVec4d& v)
{ return SGVec4f((float)v(0), (float)v(1), (float)v(2), (float)v(3)); }
{ SGVec4f f(v); return f; }
inline
SGVec4d
toVec4d(const SGVec4f& v)
{ return SGVec4d(v(0), v(1), v(2), v(3)); }
{ SGVec4d d(v); return d; }
#endif

View File

@ -19,7 +19,7 @@
#define __SIMD_H__ 1
#include <cstring>
#include <cmath>
template<typename T, int N>
class simd4_t
@ -31,7 +31,7 @@ private:
};
public:
simd4_t() {}
simd4_t(void) {}
template<typename S>
simd4_t(S s) {
vec[0] = vec[1] = vec[2] = vec[3] = s;
@ -42,21 +42,21 @@ public:
simd4_t(const simd4_t<T,N>& v) {
std::memcpy(vec, v.vec, sizeof(T[N]));
}
~simd4_t() {}
~simd4_t(void) {}
T (&ptr(void))[N] {
inline const T (&ptr(void) const)[N] {
return vec;
}
const T (&ptr(void) const)[N] {
inline T (&ptr(void))[N] {
return vec;
}
inline operator const T*() const {
inline operator const T*(void) const {
return vec;
}
inline operator T*() {
inline operator T*(void) {
return vec;
}
@ -75,101 +75,83 @@ public:
}
template<typename S>
inline simd4_t<T,N> operator+(S s)
{
inline simd4_t<T,N> operator+(S s) {
simd4_t<T,N> r(*this);
r += s;
return r;
}
inline simd4_t<T,N> operator+(const T v[N])
{
inline simd4_t<T,N> operator+(const T v[N]) {
simd4_t<T,N> r(v);
r += *this;
return r;
}
inline simd4_t<T,N> operator+(const simd4_t<T,N>& v)
inline simd4_t<T,N> operator+(simd4_t<T,N> v)
{
simd4_t<T,N> r(v);
r += *this;
return r;
v += *this;
return v;
}
inline simd4_t<T,N> operator-()
{
inline simd4_t<T,N> operator-(void) {
simd4_t<T,N> r(0);
r -= *this;
return r;
}
template<typename S>
inline simd4_t<T,N> operator-(S s)
{
inline simd4_t<T,N> operator-(S s) {
simd4_t<T,N> r(*this);
r -= s;
return r;
}
inline simd4_t<T,N> operator-(simd4_t<T,N>& v)
{
inline simd4_t<T,N> operator-(const simd4_t<T,N>& v) {
simd4_t<T,N> r(*this);
r -= v;
return r;
}
template<typename S>
inline simd4_t<T,N> operator*(S s)
{
inline simd4_t<T,N> operator*(S s) {
simd4_t<T,N> r(s);
r *= *this;
return r;
}
inline simd4_t<T,N> operator*(const T v[N])
{
inline simd4_t<T,N> operator*(const T v[N]) {
simd4_t<T,N> r(v);
r *= *this;
return r;
}
inline simd4_t<T,N> operator*(simd4_t<T,N>& v)
{
simd4_t<T,N> r(v);
r *= *this;
return r;
inline simd4_t<T,N> operator*(simd4_t<T,N> v) {
v *= *this; return v;
}
template<typename S>
inline simd4_t<T,N> operator/(S s)
{
inline simd4_t<T,N> operator/(S s) {
simd4_t<T,N> r(1/T(s));
r *= this;
return r;
}
inline simd4_t<T,N> operator/(const T v[N])
{
inline simd4_t<T,N> operator/(const T v[N]) {
simd4_t<T,N> r(*this);
r /= v;
return r;
}
inline simd4_t<T,N> operator/(simd4_t<T,N>& v)
{
inline simd4_t<T,N> operator/(const simd4_t<T,N>& v) {
simd4_t<T,N> r(*this);
r /= v;
return r;
r /= v; return v;
}
template<typename S>
inline simd4_t<T,N>& operator+=(S s)
{
inline simd4_t<T,N>& operator+=(S s) {
for (int i=0; i<N; i++) {
vec[i] += s;
}
return *this;
}
inline simd4_t<T,N>& operator+=(const T v[N])
{
inline simd4_t<T,N>& operator+=(const T v[N]) {
simd4_t<T,N> r(v);
*this += r;
*this += r.simd4;
return *this;
}
inline simd4_t<T,N>& operator+=(const simd4_t<T,N>& v)
{
inline simd4_t<T,N>& operator+=(const simd4_t<T,N>& v) {
for (int i=0; i<N; i++) {
vec[i] += v[i];
}
@ -177,22 +159,19 @@ public:
}
template<typename S>
inline simd4_t<T,N>& operator-=(S s)
{
inline simd4_t<T,N>& operator-=(S s) {
for (int i=0; i<N; i++) {
vec[i] -= s;
}
return *this;
}
inline simd4_t<T,N>& operator-=(const T v[N])
{
inline simd4_t<T,N>& operator-=(const T v[N]) {
simd4_t<T,N> r(v);
*this -= r;
*this -= r.simd4;
return *this;
}
inline simd4_t<T,N>& operator-=(const simd4_t<T,N>& v)
{
inline simd4_t<T,N>& operator-=(const simd4_t<T,N>& v) {
for (int i=0; i<N; i++) {
vec[i] -= v[i];
}
@ -200,21 +179,18 @@ public:
}
template<typename S>
inline simd4_t<T,N>& operator *=(S s)
{
inline simd4_t<T,N>& operator*=(S s) {
for (int i=0; i<N; i++) {
vec[i] *= s;
}
return *this;
}
inline simd4_t<T,N>& operator*=(const T v[N])
{
inline simd4_t<T,N>& operator*=(const T v[N]) {
simd4_t<T,N> r(v);
*this *= r;
*this *= r.simd4;
return *this;
}
inline simd4_t<T,N>& operator*=(const simd4_t<T,N>& v)
{
inline simd4_t<T,N>& operator*=(const simd4_t<T,N>& v) {
for (int i=0; i<N; i++) {
vec[i] *= v[i];
}
@ -222,30 +198,37 @@ public:
}
template<typename S>
inline simd4_t<T,N>& operator/=(S s)
{
s = (1/s);
for (int i=0; i<N; i++) {
vec[i] *= s;
}
return *this;
inline simd4_t<T,N>& operator/=(S s) {
return operator*=(1/T(s));
}
inline simd4_t<T,N>& operator/=(const T v[N])
{
inline simd4_t<T,N>& operator/=(const T v[N]) {
simd4_t<T,N> r(v);
*this /= r;
*this /= r.simd4;
return *this;
}
inline simd4_t<T,N>& operator/=(const simd4_t<T,N>& v)
{
inline simd4_t<T,N>& operator/=(const simd4_t<T,N>& v) {
for (int i=0; i<N; i++) {
vec[i] /= v[i];
}
return *this;
}
};
namespace simd4
{
template<typename T, int N>
inline simd4_t<T,N> abs(simd4_t<T,N> v) {
for (int i=0; i<N; i++) {
v[i] = std::abs(v[i]);
}
return v;
}
};
# ifdef __MMX__
# include <mmintrin.h>
# endif
# ifdef __SSE__
# include <xmmintrin.h>
@ -261,7 +244,7 @@ private:
};
public:
simd4_t() {}
simd4_t(void) {}
simd4_t(float f) {
simd4 = _mm_set1_ps(f);
}
@ -277,19 +260,27 @@ public:
}
#endif
float (&ptr(void))[N] {
inline __m128 (&v4(void)) {
return simd4;
}
inline const __m128 (&v4(void) const) {
return simd4;
}
inline const float (&ptr(void) const)[N] {
return vec;
}
const float (&ptr(void) const)[N] {
inline float (&ptr(void))[N] {
return vec;
}
inline operator const float*() const {
inline operator const float*(void) const {
return vec;
}
inline operator float*() {
inline operator float*(void) {
return vec;
}
@ -308,7 +299,7 @@ public:
inline simd4_t<float,N>& operator+=(float f) {
simd4_t<float,N> r(f);
simd4 += r;
simd4 += r.simd4;
return *this;
}
inline simd4_t<float,N>& operator+=(const simd4_t<float,N>& v) {
@ -318,7 +309,7 @@ public:
inline simd4_t<float,N>& operator-=(float f) {
simd4_t<float,N> r(f);
simd4 -= r;
simd4 -= r.simd4;
return *this;
}
inline simd4_t<float,N>& operator-=(const simd4_t<float,N>& v) {
@ -326,7 +317,7 @@ public:
return *this;
}
inline simd4_t<float,N>& operator *=(float f) {
inline simd4_t<float,N>& operator*=(float f) {
simd4_t<float,N> r(f);
simd4 *= r.simd4;
return *this;
@ -346,6 +337,16 @@ public:
return *this;
}
};
namespace simd4
{
template<int N>
inline simd4_t<float,N>abs(simd4_t<float,N> v) {
static const __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31
v.v4() = _mm_andnot_ps(sign_mask, v.v4());
return v;
}
};
# endif
@ -362,7 +363,7 @@ private:
};
public:
simd4_t() {}
simd4_t(void) {}
simd4_t(double d) {
simd4[0] = simd4[1] = _mm_set1_pd(d);
}
@ -383,19 +384,19 @@ public:
return simd4;
}
inline double (&ptr(void))[N] {
return vec;
}
inline const double (&ptr(void) const)[N] {
return vec;
}
inline operator const double*() const {
inline double (&ptr(void))[N] {
return vec;
}
inline operator double*() {
inline operator const double*(void) const {
return vec;
}
inline operator double*(void) {
return vec;
}
@ -416,8 +417,8 @@ public:
inline simd4_t<double,N>& operator+=(double d) {
simd4_t<double,N> r(d);
simd4[0] += r;
simd4[1] += r;
simd4[0] += r.simd4[0];
simd4[1] += r.simd4[1];
return *this;
}
inline simd4_t<double,N>& operator+=(const simd4_t<double,N>& v) {
@ -428,8 +429,8 @@ public:
inline simd4_t<double,N>& operator-=(double d) {
simd4_t<double,N> r(d);
simd4[0] -= r;
simd4[1] -= r;
simd4[0] -= r.simd4[0];
simd4[1] -= r.simd4[1];
return *this;
}
inline simd4_t<double,N>& operator-=(const simd4_t<double,N>& v) {
@ -438,10 +439,10 @@ public:
return *this;
}
inline simd4_t<double,N>& operator *=(double d) {
inline simd4_t<double,N>& operator*=(double d) {
simd4_t<double,N> r(d);
simd4[0] *= r;
simd4[1] *= r;
simd4[0] *= r.simd4[0];
simd4[1] *= r.simd4[1];
return *this;
}
inline simd4_t<double,N>& operator*=(const simd4_t<double,N>& v) {
@ -452,8 +453,8 @@ public:
inline simd4_t<double,N>& operator/=(double d) {
simd4_t<double,N> r(1.0/d);
simd4[0] *= r;
simd4[1] *= r;
simd4[0] *= r.simd4[0];
simd4[1] *= r.simd4[1];
return *this;
}
inline simd4_t<double,N>& operator/=(const simd4_t<double,N>& v) {
@ -462,6 +463,17 @@ public:
return *this;
}
};
namespace simd4
{
template<int N>
inline simd4_t<double,N>abs(simd4_t<double,N> v) {
static const __m128d sign_mask = _mm_set1_pd(-0.); // -0. = 1 << 63
v.v4[0] = mm_andnot_ps(sign_mask, v4[0]);
v.v4[1] = _mm_andnot_ps(sign_mask, v4[1]);
return v;
}
};
# endif
@ -480,7 +492,7 @@ private:
};
public:
simd4_t() {}
simd4_t(void) {}
simd4_t(int i) {
simd4 = _mm_set1_epi32(i);
}
@ -491,19 +503,19 @@ public:
simd4 = v.simd4;
}
int (&ptr(void))[N] {
inline const int (&ptr(void) const)[N] {
return vec;
}
const int (&ptr(void) const)[N] {
inline int (&ptr(void))[N] {
return vec;
}
inline operator const int*() const {
inline operator const int*(void) const {
return vec;
}
inline operator int*() {
inline operator int*(void) {
return vec;
}
@ -522,7 +534,7 @@ public:
inline simd4_t<int,N>& operator+=(int i) {
simd4_t<int,N> r(i);
simd4 += r;
simd4 += r.simd4;
return *this;
}
inline simd4_t<int,N>& operator+=(const simd4_t<int,N>& v) {
@ -532,7 +544,7 @@ public:
inline simd4_t<int,N>& operator-=(int i) {
simd4_t<int,N> r(i);
simd4 -= r;
simd4 -= r.simd4;
return *this;
}
inline simd4_t<int,N>& operator-=(const simd4_t<int,N>& v) {
@ -540,9 +552,9 @@ public:
return *this;
}
inline simd4_t<int,N>& operator *=(int i) {
inline simd4_t<int,N>& operator*=(int i) {
simd4_t<int,N> r(i);
simd4 *= r;
simd4 *= r.simd4;
return *this;
}
inline simd4_t<int,N>& operator*=(const simd4_t<int,N>& v) {
@ -552,7 +564,7 @@ public:
inline simd4_t<int,N>& operator/=(int i) {
simd4_t<int,N> r(i);
simd4 /= r;
simd4 /= r.simd4;
return *this;
}
inline simd4_t<int,N>& operator/=(const simd4_t<int,N>& v) {

250
simgear/math/simd4x4.hxx Normal file
View File

@ -0,0 +1,250 @@
// Copyright (C) 2016 Erik Hofman - erik@ehofman.com
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Library General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//
#ifndef __SIMD4X4_H__
#define __SIMD4X4_H__ 1
#include <cstring>
template<typename T, int N>
class simd4x4_t
{
private:
union {
T m4x4[4][4];
T mtx[N][N];
T array[N*N];
};
public:
simd4x4_t(void) {}
explicit simd4x4_t(const T m[N][N]) {
std::memcpy(array, m, sizeof(T[N][N]));
}
simd4x4_t(const simd4x4_t<T,N>& m) {
std::memcpy(array, m, sizeof(T[N][N]));
}
~simd4x4_t(void) {}
inline const T (&ptr(void) const)[N][N] {
return mtx;
}
inline T (&ptr(void))[N][N] {
return mtx;
}
inline operator const T*(void) const {
return array;
}
inline operator T*(void) {
return array;
}
inline simd4x4_t<T,N>& operator=(const T m[N][N]) {
std::memcpy(array, m, sizeof(T[N][N]));
return *this;
}
inline simd4x4_t<T,N>& operator=(const simd4x4_t<T,N>& m) {
std::memcpy(array, m, sizeof(T[N][N]));
return *this;
}
inline simd4x4_t<T,N> operator+(simd4x4_t<T,N> m) {
m += *this; return m;
}
inline simd4x4_t<T,N> operator-(simd4x4_t<T,N> m) {
m -= *this; return m;
}
template<typename S>
inline simd4x4_t<T,N> operator*(S s) {
simd4x4_t<T,N> r(*this);
r *= s;
return r;
}
inline simd4x4_t<T,N> operator*(simd4x4_t<T,N> m) {
m *= *this; return m;
}
template<typename S>
inline simd4x4_t<T,N> operator/(S s) {
simd4x4_t<T,N> r(*this);
r *= (1/T(s));
return r;
}
inline simd4x4_t<T,N>& operator+=(const simd4x4_t<T,N>& m) {
for (int i=0; i<N*N; i++) {
ptr[i] += m[i];
}
return *this;
}
inline simd4x4_t<T,N>& operator-=(const simd4x4_t<T,N>& m) {
for (int i=0; i<N*N; i++) {
ptr[i] -= m[i];
}
return *this;
}
template<typename S>
inline simd4x4_t<T,N>& operator*=(S s) {
for (int i=0; i<N*N; i++) {
ptr[i] *= s;
}
return *this;
}
inline simd4x4_t<T,N>& operator*=(const simd4x4_t<T,N>& m) {
//TODO
return *this;
}
template<typename S>
inline simd4x4_t<T,N>& operator/=(S s) {
return operator*=(1/T(s));
}
};
namespace simd4x4
{
template<typename T, int N>
void zeros(simd4x4_t<T,N>& r) {
std::memset(r, 0, sizeof(T[N][N]));
}
template<typename T, int N>
void unit(simd4x4_t<T,N>& r) {
zeros(r);
for (int i=0; i<4; i++) {
r.ptr()[i][i] = 1;
}
}
#if 0
template<typename S, int N>
simd4_t<T,N>
_pt4Matrix4_sse(const simd4x4_t<T,N>& m, const simd4_t<S,N>& vi)
{
simd4_t<T,N> r;
simd4_t<T,N> x(vi[0]);
r = x*m[0];
for (int i=1; i<N; i++ {
x = vi[i];
r += x*m[i];
}
return r;
}
#endif
}
# ifdef __SSE__
# include <xmmintrin.h>
template<>
class simd4x4_t<float,4>
{
private:
typedef float __mtx4f_t[4][4];
union {
__m128 simd4x4[4];
__mtx4f_t mtx;
float array[4*4];
};
public:
simd4x4_t(void) {}
explicit simd4x4_t(const __mtx4f_t m) {
for (int i=0; i<4; i++) {
simd4x4[i] = _mm_loadu_ps(m[i]);
}
}
simd4x4_t(const simd4x4_t<float,4>& m) {
for (int i=0; i<4; i++) {
simd4x4[i] = m.m4x4()[i];
}
}
~simd4x4_t(void) {}
inline __m128 (&m4x4(void))[4] {
return simd4x4;
}
inline const __m128 (&m4x4(void) const)[4] {
return simd4x4;
}
inline const float (&ptr(void) const)[4][4] {
return mtx;
}
inline float (&ptr(void))[4][4] {
return mtx;
}
inline operator const float*(void) const {
return array;
}
inline operator float*(void) {
return array;
}
inline simd4x4_t<float,4>& operator=(const __mtx4f_t m) {
for (int i=0; i<4; i++) {
simd4x4[i] = _mm_loadu_ps(m[i]);
}
return *this;
}
inline simd4x4_t<float,4>& operator=(const simd4x4_t<float,4>& m) {
for (int i=0; i<4; i++) {
simd4x4[i] = m.m4x4()[i];
}
return *this;
}
inline simd4x4_t<float,4>& operator+=(const simd4x4_t<float,4>& m) {
for (int i=0; i<4; i++) {
simd4x4[i] += m.m4x4()[i];
}
return *this;
}
inline simd4x4_t<float,4>& operator-=(const simd4x4_t<float,4>& m) {
for (int i=0; i<4; i++) {
simd4x4[i] -= m.m4x4()[i];
}
return *this;
}
inline simd4x4_t<float,4>& operator*=(float f) {
__m128 f4 = _mm_set1_ps(f);
for (int i=0; i<4; i++) {
simd4x4[i] *= f4;
}
return *this;
}
};
# endif
#endif /* __SIMD4X4_H__ */