diff --git a/simgear/math/CMakeLists.txt b/simgear/math/CMakeLists.txt index 6a5a6ef6..b82a7243 100644 --- a/simgear/math/CMakeLists.txt +++ b/simgear/math/CMakeLists.txt @@ -33,6 +33,7 @@ set(HEADERS sg_types.hxx sg_random.h simd.hxx + simd4x4.hxx ) set(SOURCES diff --git a/simgear/math/SGMatrix.hxx b/simgear/math/SGMatrix.hxx index a7517a69..c796c336 100644 --- a/simgear/math/SGMatrix.hxx +++ b/simgear/math/SGMatrix.hxx @@ -18,6 +18,8 @@ #ifndef SGMatrix_H #define SGMatrix_H +#include "simd4x4.hxx" + /// Expression templates for poor programmers ... :) template struct TransNegRef; @@ -38,13 +40,13 @@ public: /// uninitialized values in the debug build very fast ... #ifndef NDEBUG for (unsigned i = 0; i < nEnts; ++i) - _data.flat[i] = SGLimits::quiet_NaN(); + _data[i] = SGLimits::quiet_NaN(); #endif } /// Constructor. Initialize by the content of a plain array, /// make sure it has at least 16 elements explicit SGMatrix(const T* data) - { for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] = data[i]; } + { for (unsigned i = 0; i < nEnts; ++i) _data[i] = data[i]; } /// Constructor, build up a SGMatrix from given elements SGMatrix(T m00, T m01, T m02, T m03, @@ -52,14 +54,14 @@ public: T m20, T m21, T m22, T m23, T m30, T m31, T m32, T m33) { - _data.flat[0] = m00; _data.flat[1] = m10; - _data.flat[2] = m20; _data.flat[3] = m30; - _data.flat[4] = m01; _data.flat[5] = m11; - _data.flat[6] = m21; _data.flat[7] = m31; - _data.flat[8] = m02; _data.flat[9] = m12; - _data.flat[10] = m22; _data.flat[11] = m32; - _data.flat[12] = m03; _data.flat[13] = m13; - _data.flat[14] = m23; _data.flat[15] = m33; + _data[0] = m00; _data[1] = m10; + _data[2] = m20; _data[3] = m30; + _data[4] = m01; _data[5] = m11; + _data[6] = m21; _data[7] = m31; + _data[8] = m02; _data[9] = m12; + _data[10] = m22; _data[11] = m32; + _data[12] = m03; _data[13] = m13; + _data[14] = m23; _data[15] = m33; } /// Constructor, build up a SGMatrix from a translation @@ -80,14 +82,14 @@ public: template void set(const SGVec3& trans) { - _data.flat[0] = 1; _data.flat[4] = 0; - _data.flat[8] = 0; _data.flat[12] = T(trans(0)); - _data.flat[1] = 0; _data.flat[5] = 1; - _data.flat[9] = 0; _data.flat[13] = T(trans(1)); - _data.flat[2] = 0; _data.flat[6] = 0; - _data.flat[10] = 1; _data.flat[14] = T(trans(2)); - _data.flat[3] = 0; _data.flat[7] = 0; - _data.flat[11] = 0; _data.flat[15] = 1; + _data[0] = 1; _data[4] = 0; + _data[8] = 0; _data[12] = T(trans(0)); + _data[1] = 0; _data[5] = 1; + _data[9] = 0; _data[13] = T(trans(1)); + _data[2] = 0; _data[6] = 0; + _data[10] = 1; _data[14] = T(trans(2)); + _data[3] = 0; _data[7] = 0; + _data[11] = 0; _data[15] = 1; } /// Set from a scale/rotation and tranlation @@ -98,82 +100,89 @@ public: T xx = x*x; T yy = y*y; T zz = z*z; T wx = w*x; T wy = w*y; T wz = w*z; T xy = x*y; T xz = x*z; T yz = y*z; - _data.flat[0] = 1-2*(yy+zz); _data.flat[1] = 2*(xy-wz); - _data.flat[2] = 2*(xz+wy); _data.flat[3] = 0; - _data.flat[4] = 2*(xy+wz); _data.flat[5] = 1-2*(xx+zz); - _data.flat[6] = 2*(yz-wx); _data.flat[7] = 0; - _data.flat[8] = 2*(xz-wy); _data.flat[9] = 2*(yz+wx); - _data.flat[10] = 1-2*(xx+yy); _data.flat[11] = 0; - _data.flat[12] = 0; _data.flat[13] = 0; - _data.flat[14] = 0; _data.flat[15] = 1; + _data[0] = 1-2*(yy+zz); _data[1] = 2*(xy-wz); + _data[2] = 2*(xz+wy); _data[3] = 0; + _data[4] = 2*(xy+wz); _data[5] = 1-2*(xx+zz); + _data[6] = 2*(yz-wx); _data[7] = 0; + _data[8] = 2*(xz-wy); _data[9] = 2*(yz+wx); + _data[10] = 1-2*(xx+yy); _data[11] = 0; + _data[12] = 0; _data[13] = 0; + _data[14] = 0; _data[15] = 1; } /// set from a transposed negated matrix void set(const TransNegRef& tm) { const SGMatrix& m = tm.m; - _data.flat[0] = m(0,0); - _data.flat[1] = m(0,1); - _data.flat[2] = m(0,2); - _data.flat[3] = m(3,0); + _data[0] = m(0,0); + _data[1] = m(0,1); + _data[2] = m(0,2); + _data[3] = m(3,0); - _data.flat[4] = m(1,0); - _data.flat[5] = m(1,1); - _data.flat[6] = m(1,2); - _data.flat[7] = m(3,1); + _data[4] = m(1,0); + _data[5] = m(1,1); + _data[6] = m(1,2); + _data[7] = m(3,1); - _data.flat[8] = m(2,0); - _data.flat[9] = m(2,1); - _data.flat[10] = m(2,2); - _data.flat[11] = m(3,2); + _data[8] = m(2,0); + _data[9] = m(2,1); + _data[10] = m(2,2); + _data[11] = m(3,2); // Well, this one is ugly here, as that xform method on the current // object needs the above data to be already set ... SGVec3 t = xformVec(SGVec3(m(0,3), m(1,3), m(2,3))); - _data.flat[12] = -t(0); - _data.flat[13] = -t(1); - _data.flat[14] = -t(2); - _data.flat[15] = m(3,3); + _data[12] = -t(0); + _data[13] = -t(1); + _data[14] = -t(2); + _data[15] = m(3,3); } /// Access by index, the index is unchecked const T& operator()(unsigned i, unsigned j) const - { return _data.flat[i + 4*j]; } + { return _data[i + 4*j]; } /// Access by index, the index is unchecked T& operator()(unsigned i, unsigned j) - { return _data.flat[i + 4*j]; } + { return _data[i + 4*j]; } /// Access raw data by index, the index is unchecked const T& operator[](unsigned i) const - { return _data.flat[i]; } + { return _data[i]; } /// Access by index, the index is unchecked T& operator[](unsigned i) - { return _data.flat[i]; } + { return _data[i]; } /// Get the data pointer const T* data(void) const - { return _data.flat; } + { return _data; } /// Get the data pointer T* data(void) - { return _data.flat; } + { return _data; } /// Readonly interface function to ssg's sgMat4/sgdMat4 const T (&sg(void) const)[4][4] - { return _data.carray; } + { return _data.ptr(); } /// Interface function to ssg's sgMat4/sgdMat4 T (&sg(void))[4][4] - { return _data.carray; } + { return _data.ptr(); } + /// Readonly raw storage interface + const simd4x4_t (&simd4x4(void) const) + { return _data; } + /// Readonly raw storage interface + simd4x4_t (&simd4x4(void)) + { return _data; } + /// Inplace addition SGMatrix& operator+=(const SGMatrix& m) - { for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] += m._data.flat[i]; return *this; } + { for (unsigned i = 0; i < nEnts; ++i) _data[i] += m._data[i]; return *this; } /// Inplace subtraction SGMatrix& operator-=(const SGMatrix& m) - { for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] -= m._data.flat[i]; return *this; } + { for (unsigned i = 0; i < nEnts; ++i) _data[i] -= m._data[i]; return *this; } /// Inplace scalar multiplication template SGMatrix& operator*=(S s) - { for (unsigned i = 0; i < nEnts; ++i) _data.flat[i] *= s; return *this; } + { for (unsigned i = 0; i < nEnts; ++i) _data[i] *= s; return *this; } /// Inplace scalar multiplication by 1/s template SGMatrix& operator/=(S s) @@ -259,22 +268,15 @@ public: /// Return an all zero matrix static SGMatrix zeros(void) - { return SGMatrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } + { SGMatrix r; simd4x4::zeros(r.simd4x4()); return r; } /// Return a unit matrix static SGMatrix unit(void) - { return SGMatrix(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); } + { SGMatrix r; simd4x4::unit(r.simd4x4()); return r; } private: - /// Required to make that alias safe. - union Data { - T flat[16]; - T carray[4][4]; - }; - /// The actual data, the matrix is stored in column major order, - /// that matches the storage format of OpenGL - Data _data; + simd4x4_t _data; }; /// Class to distinguish between a matrix and the matrix with a transposed @@ -296,60 +298,55 @@ operator+(const SGMatrix& m) template inline SGMatrix -operator-(const SGMatrix& m) +operator-(SGMatrix m) { - SGMatrix ret; - for (unsigned i = 0; i < SGMatrix::nEnts; ++i) - ret[i] = -m[i]; - return ret; + for (unsigned i = 0; i < SGMatrix::nRows; ++i) + m[i] = -m[i]; + return m; } /// Binary + template inline SGMatrix -operator+(const SGMatrix& m1, const SGMatrix& m2) +operator+(SGMatrix m1, const SGMatrix& m2) { - SGMatrix ret; for (unsigned i = 0; i < SGMatrix::nEnts; ++i) - ret[i] = m1[i] + m2[i]; - return ret; + m1[i] += m2[i]; + return m1; } /// Binary - template inline SGMatrix -operator-(const SGMatrix& m1, const SGMatrix& m2) +operator-(SGMatrix m1, const SGMatrix& m2) { - SGMatrix ret; for (unsigned i = 0; i < SGMatrix::nEnts; ++i) - ret[i] = m1[i] - m2[i]; - return ret; + m1[i] -= m2[i]; + return m1; } /// Scalar multiplication template inline SGMatrix -operator*(S s, const SGMatrix& m) +operator*(S s, SGMatrix m) { - SGMatrix ret; for (unsigned i = 0; i < SGMatrix::nEnts; ++i) - ret[i] = s*m[i]; - return ret; + m[i] *= s; + return m; } /// Scalar multiplication template inline SGMatrix -operator*(const SGMatrix& m, S s) +operator*(SGMatrix m, S s) { - SGMatrix ret; for (unsigned i = 0; i < SGMatrix::nEnts; ++i) - ret[i] = s*m[i]; - return ret; + m[i] *= s; + return m; } /// Vector multiplication diff --git a/simgear/math/SGVec2.hxx b/simgear/math/SGVec2.hxx index 64bb30da..e4fe25af 100644 --- a/simgear/math/SGVec2.hxx +++ b/simgear/math/SGVec2.hxx @@ -244,15 +244,18 @@ length(const SGVec2& v) template inline T -norm1(const SGVec2& v) -{ return fabs(v(0)) + fabs(v(1)); } +norm1(SGVec2 v) +{ v.simd2() = simd4::abs(v.simd2()); return (v(0)+v(1)); } /// The inf-norm of the vector template inline T -normI(const SGVec2& v) -{ return SGMisc::max(fabs(v(0)), fabs(v(1))); } +normI(SGVec2 v) +{ + v.simd2() = simd4::abs(v.simd2()); + return SGMisc::max(v(0), v(1)); +} /// The euclidean norm of the vector, that is what most people call length template @@ -383,11 +386,11 @@ operator<<(std::basic_ostream& s, const SGVec2& v) inline SGVec2f toVec2f(const SGVec2d& v) -{ return SGVec2f((float)v(0), (float)v(1)); } +{ SGVec2f f(v); return f; } inline SGVec2d toVec2d(const SGVec2f& v) -{ return SGVec2d(v(0), v(1)); } +{ SGVec2d d(v); return d; } #endif diff --git a/simgear/math/SGVec3.hxx b/simgear/math/SGVec3.hxx index 0d0f96bd..2d25a216 100644 --- a/simgear/math/SGVec3.hxx +++ b/simgear/math/SGVec3.hxx @@ -338,15 +338,18 @@ length(const SGVec3& v) template inline T -norm1(const SGVec3& v) -{ return fabs(v(0)) + fabs(v(1)) + fabs(v(2)); } +norm1(SGVec3 v) +{ v.simd3() = simd4::abs(v.simd3()); return (v(0)+v(1)+v(2)); } /// The inf-norm of the vector template inline T -normI(const SGVec3& v) -{ return SGMisc::max(fabs(v(0)), fabs(v(1)), fabs(v(2))); } +normI(SGVec3 v) +{ + v.simd3() = simd4::abs(v.simd3()); + return SGMisc::max(v(0), v(1), v(2)); +} /// Vector cross product template @@ -519,11 +522,11 @@ operator<<(std::basic_ostream& s, const SGVec3& v) inline SGVec3f toVec3f(const SGVec3d& v) -{ return SGVec3f((float)v(0), (float)v(1), (float)v(2)); } +{ SGVec3f f(v); return f; } inline SGVec3d toVec3d(const SGVec3f& v) -{ return SGVec3d(v(0), v(1), v(2)); } +{ SGVec3d d(v); return d; } #endif diff --git a/simgear/math/SGVec4.hxx b/simgear/math/SGVec4.hxx index 9083e325..f7e256d6 100644 --- a/simgear/math/SGVec4.hxx +++ b/simgear/math/SGVec4.hxx @@ -291,15 +291,18 @@ length(const SGVec4& v) template inline T -norm1(const SGVec4& v) -{ return fabs(v(0)) + fabs(v(1)) + fabs(v(2)) + fabs(v(3)); } +norm1(SGVec4 v) +{ v.simd4() = simd4::abs(v.simd4()); return (v(0)+v(1)+v(2)+v(3)); } /// The inf-norm of the vector template inline T -normI(const SGVec4& v) -{ return SGMisc::max(fabs(v(0)), fabs(v(1)), fabs(v(2)), fabs(v(2))); } +normI(SGVec4 v) +{ + v.simd4() = simd4::abs(v.simd4()); + return SGMisc::max(v(0), v(1), v(2), v(3)); +} /// The euclidean norm of the vector, that is what most people call length template @@ -439,11 +442,11 @@ operator<<(std::basic_ostream& s, const SGVec4& v) inline SGVec4f toVec4f(const SGVec4d& v) -{ return SGVec4f((float)v(0), (float)v(1), (float)v(2), (float)v(3)); } +{ SGVec4f f(v); return f; } inline SGVec4d toVec4d(const SGVec4f& v) -{ return SGVec4d(v(0), v(1), v(2), v(3)); } +{ SGVec4d d(v); return d; } #endif diff --git a/simgear/math/simd.hxx b/simgear/math/simd.hxx index ae2d62bb..021f65e1 100644 --- a/simgear/math/simd.hxx +++ b/simgear/math/simd.hxx @@ -19,7 +19,7 @@ #define __SIMD_H__ 1 #include - +#include template class simd4_t @@ -31,7 +31,7 @@ private: }; public: - simd4_t() {} + simd4_t(void) {} template simd4_t(S s) { vec[0] = vec[1] = vec[2] = vec[3] = s; @@ -42,21 +42,21 @@ public: simd4_t(const simd4_t& v) { std::memcpy(vec, v.vec, sizeof(T[N])); } - ~simd4_t() {} + ~simd4_t(void) {} - T (&ptr(void))[N] { + inline const T (&ptr(void) const)[N] { return vec; } - const T (&ptr(void) const)[N] { + inline T (&ptr(void))[N] { return vec; } - inline operator const T*() const { + inline operator const T*(void) const { return vec; } - inline operator T*() { + inline operator T*(void) { return vec; } @@ -75,101 +75,83 @@ public: } template - inline simd4_t operator+(S s) - { + inline simd4_t operator+(S s) { simd4_t r(*this); r += s; return r; } - inline simd4_t operator+(const T v[N]) - { + inline simd4_t operator+(const T v[N]) { simd4_t r(v); r += *this; return r; } - inline simd4_t operator+(const simd4_t& v) + inline simd4_t operator+(simd4_t v) { - simd4_t r(v); - r += *this; - return r; + v += *this; + return v; } - inline simd4_t operator-() - { + inline simd4_t operator-(void) { simd4_t r(0); r -= *this; return r; } template - inline simd4_t operator-(S s) - { + inline simd4_t operator-(S s) { simd4_t r(*this); r -= s; return r; } - inline simd4_t operator-(simd4_t& v) - { + inline simd4_t operator-(const simd4_t& v) { simd4_t r(*this); r -= v; return r; } template - inline simd4_t operator*(S s) - { + inline simd4_t operator*(S s) { simd4_t r(s); r *= *this; return r; } - inline simd4_t operator*(const T v[N]) - { + inline simd4_t operator*(const T v[N]) { simd4_t r(v); r *= *this; return r; } - inline simd4_t operator*(simd4_t& v) - { - simd4_t r(v); - r *= *this; - return r; + inline simd4_t operator*(simd4_t v) { + v *= *this; return v; } template - inline simd4_t operator/(S s) - { + inline simd4_t operator/(S s) { simd4_t r(1/T(s)); r *= this; return r; } - inline simd4_t operator/(const T v[N]) - { + inline simd4_t operator/(const T v[N]) { simd4_t r(*this); r /= v; return r; } - inline simd4_t operator/(simd4_t& v) - { + inline simd4_t operator/(const simd4_t& v) { simd4_t r(*this); - r /= v; - return r; + r /= v; return v; } template - inline simd4_t& operator+=(S s) - { + inline simd4_t& operator+=(S s) { for (int i=0; i& operator+=(const T v[N]) - { + inline simd4_t& operator+=(const T v[N]) { simd4_t r(v); - *this += r; + *this += r.simd4; return *this; } - inline simd4_t& operator+=(const simd4_t& v) - { + inline simd4_t& operator+=(const simd4_t& v) { for (int i=0; i - inline simd4_t& operator-=(S s) - { + inline simd4_t& operator-=(S s) { for (int i=0; i& operator-=(const T v[N]) - { + inline simd4_t& operator-=(const T v[N]) { simd4_t r(v); - *this -= r; + *this -= r.simd4; return *this; } - inline simd4_t& operator-=(const simd4_t& v) - { + inline simd4_t& operator-=(const simd4_t& v) { for (int i=0; i - inline simd4_t& operator *=(S s) - { + inline simd4_t& operator*=(S s) { for (int i=0; i& operator*=(const T v[N]) - { + inline simd4_t& operator*=(const T v[N]) { simd4_t r(v); - *this *= r; + *this *= r.simd4; return *this; } - inline simd4_t& operator*=(const simd4_t& v) - { + inline simd4_t& operator*=(const simd4_t& v) { for (int i=0; i - inline simd4_t& operator/=(S s) - { - s = (1/s); - for (int i=0; i& operator/=(S s) { + return operator*=(1/T(s)); } - inline simd4_t& operator/=(const T v[N]) - { + inline simd4_t& operator/=(const T v[N]) { simd4_t r(v); - *this /= r; + *this /= r.simd4; return *this; } - inline simd4_t& operator/=(const simd4_t& v) - { + inline simd4_t& operator/=(const simd4_t& v) { for (int i=0; i +inline simd4_t abs(simd4_t v) { + for (int i=0; i +# endif + # ifdef __SSE__ # include @@ -261,7 +244,7 @@ private: }; public: - simd4_t() {} + simd4_t(void) {} simd4_t(float f) { simd4 = _mm_set1_ps(f); } @@ -277,19 +260,27 @@ public: } #endif - float (&ptr(void))[N] { + inline __m128 (&v4(void)) { + return simd4; + } + + inline const __m128 (&v4(void) const) { + return simd4; + } + + inline const float (&ptr(void) const)[N] { return vec; } - const float (&ptr(void) const)[N] { + inline float (&ptr(void))[N] { return vec; } - inline operator const float*() const { + inline operator const float*(void) const { return vec; } - inline operator float*() { + inline operator float*(void) { return vec; } @@ -308,7 +299,7 @@ public: inline simd4_t& operator+=(float f) { simd4_t r(f); - simd4 += r; + simd4 += r.simd4; return *this; } inline simd4_t& operator+=(const simd4_t& v) { @@ -318,7 +309,7 @@ public: inline simd4_t& operator-=(float f) { simd4_t r(f); - simd4 -= r; + simd4 -= r.simd4; return *this; } inline simd4_t& operator-=(const simd4_t& v) { @@ -326,7 +317,7 @@ public: return *this; } - inline simd4_t& operator *=(float f) { + inline simd4_t& operator*=(float f) { simd4_t r(f); simd4 *= r.simd4; return *this; @@ -346,6 +337,16 @@ public: return *this; } }; + +namespace simd4 +{ +template +inline simd4_tabs(simd4_t v) { + static const __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31 + v.v4() = _mm_andnot_ps(sign_mask, v.v4()); + return v; + } +}; # endif @@ -362,7 +363,7 @@ private: }; public: - simd4_t() {} + simd4_t(void) {} simd4_t(double d) { simd4[0] = simd4[1] = _mm_set1_pd(d); } @@ -383,19 +384,19 @@ public: return simd4; } - inline double (&ptr(void))[N] { - return vec; - } - inline const double (&ptr(void) const)[N] { return vec; } - inline operator const double*() const { + inline double (&ptr(void))[N] { return vec; } - inline operator double*() { + inline operator const double*(void) const { + return vec; + } + + inline operator double*(void) { return vec; } @@ -416,8 +417,8 @@ public: inline simd4_t& operator+=(double d) { simd4_t r(d); - simd4[0] += r; - simd4[1] += r; + simd4[0] += r.simd4[0]; + simd4[1] += r.simd4[1]; return *this; } inline simd4_t& operator+=(const simd4_t& v) { @@ -428,8 +429,8 @@ public: inline simd4_t& operator-=(double d) { simd4_t r(d); - simd4[0] -= r; - simd4[1] -= r; + simd4[0] -= r.simd4[0]; + simd4[1] -= r.simd4[1]; return *this; } inline simd4_t& operator-=(const simd4_t& v) { @@ -438,10 +439,10 @@ public: return *this; } - inline simd4_t& operator *=(double d) { + inline simd4_t& operator*=(double d) { simd4_t r(d); - simd4[0] *= r; - simd4[1] *= r; + simd4[0] *= r.simd4[0]; + simd4[1] *= r.simd4[1]; return *this; } inline simd4_t& operator*=(const simd4_t& v) { @@ -452,8 +453,8 @@ public: inline simd4_t& operator/=(double d) { simd4_t r(1.0/d); - simd4[0] *= r; - simd4[1] *= r; + simd4[0] *= r.simd4[0]; + simd4[1] *= r.simd4[1]; return *this; } inline simd4_t& operator/=(const simd4_t& v) { @@ -462,6 +463,17 @@ public: return *this; } }; + +namespace simd4 +{ +template +inline simd4_tabs(simd4_t v) { + static const __m128d sign_mask = _mm_set1_pd(-0.); // -0. = 1 << 63 + v.v4[0] = mm_andnot_ps(sign_mask, v4[0]); + v.v4[1] = _mm_andnot_ps(sign_mask, v4[1]); + return v; + } +}; # endif @@ -480,7 +492,7 @@ private: }; public: - simd4_t() {} + simd4_t(void) {} simd4_t(int i) { simd4 = _mm_set1_epi32(i); } @@ -491,19 +503,19 @@ public: simd4 = v.simd4; } - int (&ptr(void))[N] { + inline const int (&ptr(void) const)[N] { return vec; } - const int (&ptr(void) const)[N] { + inline int (&ptr(void))[N] { return vec; } - inline operator const int*() const { + inline operator const int*(void) const { return vec; } - inline operator int*() { + inline operator int*(void) { return vec; } @@ -522,7 +534,7 @@ public: inline simd4_t& operator+=(int i) { simd4_t r(i); - simd4 += r; + simd4 += r.simd4; return *this; } inline simd4_t& operator+=(const simd4_t& v) { @@ -532,7 +544,7 @@ public: inline simd4_t& operator-=(int i) { simd4_t r(i); - simd4 -= r; + simd4 -= r.simd4; return *this; } inline simd4_t& operator-=(const simd4_t& v) { @@ -540,9 +552,9 @@ public: return *this; } - inline simd4_t& operator *=(int i) { + inline simd4_t& operator*=(int i) { simd4_t r(i); - simd4 *= r; + simd4 *= r.simd4; return *this; } inline simd4_t& operator*=(const simd4_t& v) { @@ -552,7 +564,7 @@ public: inline simd4_t& operator/=(int i) { simd4_t r(i); - simd4 /= r; + simd4 /= r.simd4; return *this; } inline simd4_t& operator/=(const simd4_t& v) { diff --git a/simgear/math/simd4x4.hxx b/simgear/math/simd4x4.hxx new file mode 100644 index 00000000..da0fddfb --- /dev/null +++ b/simgear/math/simd4x4.hxx @@ -0,0 +1,250 @@ +// Copyright (C) 2016 Erik Hofman - erik@ehofman.com +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Library General Public +// License as published by the Free Software Foundation; either +// version 2 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Library General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +// + +#ifndef __SIMD4X4_H__ +#define __SIMD4X4_H__ 1 + +#include + + +template +class simd4x4_t +{ +private: + union { + T m4x4[4][4]; + T mtx[N][N]; + T array[N*N]; + }; + +public: + simd4x4_t(void) {} + explicit simd4x4_t(const T m[N][N]) { + std::memcpy(array, m, sizeof(T[N][N])); + } + simd4x4_t(const simd4x4_t& m) { + std::memcpy(array, m, sizeof(T[N][N])); + } + ~simd4x4_t(void) {} + + inline const T (&ptr(void) const)[N][N] { + return mtx; + } + + inline T (&ptr(void))[N][N] { + return mtx; + } + + inline operator const T*(void) const { + return array; + } + + inline operator T*(void) { + return array; + } + + inline simd4x4_t& operator=(const T m[N][N]) { + std::memcpy(array, m, sizeof(T[N][N])); + return *this; + } + inline simd4x4_t& operator=(const simd4x4_t& m) { + std::memcpy(array, m, sizeof(T[N][N])); + return *this; + } + + inline simd4x4_t operator+(simd4x4_t m) { + m += *this; return m; + } + + inline simd4x4_t operator-(simd4x4_t m) { + m -= *this; return m; + } + + template + inline simd4x4_t operator*(S s) { + simd4x4_t r(*this); + r *= s; + return r; + } + inline simd4x4_t operator*(simd4x4_t m) { + m *= *this; return m; + } + + template + inline simd4x4_t operator/(S s) { + simd4x4_t r(*this); + r *= (1/T(s)); + return r; + } + + inline simd4x4_t& operator+=(const simd4x4_t& m) { + for (int i=0; i& operator-=(const simd4x4_t& m) { + for (int i=0; i + inline simd4x4_t& operator*=(S s) { + for (int i=0; i& operator*=(const simd4x4_t& m) { +//TODO + return *this; + } + + template + inline simd4x4_t& operator/=(S s) { + return operator*=(1/T(s)); + } +}; + +namespace simd4x4 +{ +template +void zeros(simd4x4_t& r) { + std::memset(r, 0, sizeof(T[N][N])); + } + +template +void unit(simd4x4_t& r) { + zeros(r); + for (int i=0; i<4; i++) { + r.ptr()[i][i] = 1; + } + } + +#if 0 +template +simd4_t +_pt4Matrix4_sse(const simd4x4_t& m, const simd4_t& vi) +{ + simd4_t r; + simd4_t x(vi[0]); + r = x*m[0]; + for (int i=1; i + +template<> +class simd4x4_t +{ +private: + typedef float __mtx4f_t[4][4]; + + union { + __m128 simd4x4[4]; + __mtx4f_t mtx; + float array[4*4]; + }; + +public: + simd4x4_t(void) {} + explicit simd4x4_t(const __mtx4f_t m) { + for (int i=0; i<4; i++) { + simd4x4[i] = _mm_loadu_ps(m[i]); + } + } + simd4x4_t(const simd4x4_t& m) { + for (int i=0; i<4; i++) { + simd4x4[i] = m.m4x4()[i]; + } + } + ~simd4x4_t(void) {} + + inline __m128 (&m4x4(void))[4] { + return simd4x4; + } + + inline const __m128 (&m4x4(void) const)[4] { + return simd4x4; + } + + inline const float (&ptr(void) const)[4][4] { + return mtx; + } + + inline float (&ptr(void))[4][4] { + return mtx; + } + + inline operator const float*(void) const { + return array; + } + + inline operator float*(void) { + return array; + } + + inline simd4x4_t& operator=(const __mtx4f_t m) { + for (int i=0; i<4; i++) { + simd4x4[i] = _mm_loadu_ps(m[i]); + } + return *this; + } + inline simd4x4_t& operator=(const simd4x4_t& m) { + for (int i=0; i<4; i++) { + simd4x4[i] = m.m4x4()[i]; + } + return *this; + } + + inline simd4x4_t& operator+=(const simd4x4_t& m) { + for (int i=0; i<4; i++) { + simd4x4[i] += m.m4x4()[i]; + } + return *this; + } + + inline simd4x4_t& operator-=(const simd4x4_t& m) { + for (int i=0; i<4; i++) { + simd4x4[i] -= m.m4x4()[i]; + } + return *this; + } + + inline simd4x4_t& operator*=(float f) { + __m128 f4 = _mm_set1_ps(f); + for (int i=0; i<4; i++) { + simd4x4[i] *= f4; + } + return *this; + } +}; + +# endif + +#endif /* __SIMD4X4_H__ */ +