Add SIMD matrix operations

This commit is contained in:
Erik Hofman 2016-11-26 11:40:01 +01:00
parent 9e0bb33d58
commit 0ea9786601
2 changed files with 51 additions and 90 deletions

View File

@ -46,7 +46,7 @@ public:
/// Constructor. Initialize by the content of a plain array, /// Constructor. Initialize by the content of a plain array,
/// make sure it has at least 16 elements /// make sure it has at least 16 elements
explicit SGMatrix(const T* data) explicit SGMatrix(const T* data)
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] = data[i]; } { simd4x4_t<T,4> x(data); _data = x; }
/// Constructor, build up a SGMatrix from given elements /// Constructor, build up a SGMatrix from given elements
SGMatrix(T m00, T m01, T m02, T m03, SGMatrix(T m00, T m01, T m02, T m03,
@ -82,14 +82,13 @@ public:
template<typename S> template<typename S>
void set(const SGVec3<S>& trans) void set(const SGVec3<S>& trans)
{ {
_data[0] = 1; _data[4] = 0; simd4x4::zeros(_data);
_data[8] = 0; _data[12] = T(trans(0)); _data[0] = 1;
_data[1] = 0; _data[5] = 1; _data[12] = T(trans(0));
_data[9] = 0; _data[13] = T(trans(1)); _data[5] = 1;
_data[2] = 0; _data[6] = 0; _data[13] = T(trans(1));
_data[10] = 1; _data[14] = T(trans(2)); _data[10] = 1; _data[14] = T(trans(2));
_data[3] = 0; _data[7] = 0; _data[15] = 1;
_data[11] = 0; _data[15] = 1;
} }
/// Set from a scale/rotation and tranlation /// Set from a scale/rotation and tranlation
@ -132,9 +131,10 @@ public:
// Well, this one is ugly here, as that xform method on the current // Well, this one is ugly here, as that xform method on the current
// object needs the above data to be already set ... // object needs the above data to be already set ...
SGVec3<T> t = xformVec(SGVec3<T>(m(0,3), m(1,3), m(2,3))); SGVec3<T> t = xformVec(SGVec3<T>(m(0,3), m(1,3), m(2,3)));
_data[12] = -t(0); t = -t;
_data[13] = -t(1); _data[12] = t(0);
_data[14] = -t(2); _data[13] = t(1);
_data[14] = t(2);
_data[15] = m(3,3); _data[15] = m(3,3);
} }
@ -175,14 +175,14 @@ public:
/// Inplace addition /// Inplace addition
SGMatrix& operator+=(const SGMatrix& m) SGMatrix& operator+=(const SGMatrix& m)
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] += m._data[i]; return *this; } { _data += m.simd4x4(); return *this; }
/// Inplace subtraction /// Inplace subtraction
SGMatrix& operator-=(const SGMatrix& m) SGMatrix& operator-=(const SGMatrix& m)
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] -= m._data[i]; return *this; } { _data -= m.simd4x4(); return *this; }
/// Inplace scalar multiplication /// Inplace scalar multiplication
template<typename S> template<typename S>
SGMatrix& operator*=(S s) SGMatrix& operator*=(S s)
{ for (unsigned i = 0; i < nEnts; ++i) _data[i] *= s; return *this; } { _data *= s; return *this; }
/// Inplace scalar multiplication by 1/s /// Inplace scalar multiplication by 1/s
template<typename S> template<typename S>
SGMatrix& operator/=(S s) SGMatrix& operator/=(S s)
@ -193,14 +193,11 @@ public:
template<typename S> template<typename S>
SGMatrix& preMultTranslate(const SGVec3<S>& t) SGMatrix& preMultTranslate(const SGVec3<S>& t)
{ {
SGVec4<T> row3((*this)(3,0), (*this)(3,1), (*this)(3,2), (*this)(3,3));
for (unsigned i = 0; i < 3; ++i) { for (unsigned i = 0; i < 3; ++i) {
T tmp = T(t(i)); SGVec4<T> trow3 = T(t(i))*row3;
if (tmp == 0) (*this)(i,0) += trow3(0); (*this)(i,1) += trow3(1);
continue; (*this)(i,2) += trow3(2); (*this)(i,3) += trow3(3);
(*this)(i,0) += tmp*(*this)(3,0);
(*this)(i,1) += tmp*(*this)(3,1);
(*this)(i,2) += tmp*(*this)(3,2);
(*this)(i,3) += tmp*(*this)(3,3);
} }
return *this; return *this;
} }
@ -238,30 +235,20 @@ public:
SGVec3<T> xformPt(const SGVec3<T>& pt) const SGVec3<T> xformPt(const SGVec3<T>& pt) const
{ {
SGVec3<T> tpt; SGVec3<T> tpt((*this)(0,3), (*this)(1,3), (*this)(2,3));
tpt(0) = (*this)(0,3);
tpt(1) = (*this)(1,3);
tpt(2) = (*this)(2,3);
for (unsigned i = 0; i < SGMatrix<T>::nCols-1; ++i) { for (unsigned i = 0; i < SGMatrix<T>::nCols-1; ++i) {
T tmp = pt(i); SGVec3<T> coli((*this)(0,i), (*this)(1,i), (*this)(2,i));
tpt(0) += tmp*(*this)(0,i); tpt += pt(i)*coli;
tpt(1) += tmp*(*this)(1,i);
tpt(2) += tmp*(*this)(2,i);
} }
return tpt; return tpt;
} }
SGVec3<T> xformVec(const SGVec3<T>& v) const SGVec3<T> xformVec(const SGVec3<T>& v) const
{ {
SGVec3<T> tv; SGVec3<T> tv((*this)(0,0), (*this)(1,0), (*this)(2,0));
T tmp = v(0); tv *= v(0);
tv(0) = tmp*(*this)(0,0);
tv(1) = tmp*(*this)(1,0);
tv(2) = tmp*(*this)(2,0);
for (unsigned i = 1; i < SGMatrix<T>::nCols-1; ++i) { for (unsigned i = 1; i < SGMatrix<T>::nCols-1; ++i) {
T tmp = v(i); SGVec3<T> coli((*this)(0,i), (*this)(1,i), (*this)(2,i));
tv(0) += tmp*(*this)(0,i); tv += v(i)*coli;
tv(1) += tmp*(*this)(1,i);
tv(2) += tmp*(*this)(2,i);
} }
return tv; return tv;
} }
@ -300,8 +287,7 @@ inline
SGMatrix<T> SGMatrix<T>
operator-(SGMatrix<T> m) operator-(SGMatrix<T> m)
{ {
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i) m.simd4x4() = -m.simd4x4();
m[i] = -m[i];
return m; return m;
} }
@ -311,8 +297,7 @@ inline
SGMatrix<T> SGMatrix<T>
operator+(SGMatrix<T> m1, const SGMatrix<T>& m2) operator+(SGMatrix<T> m1, const SGMatrix<T>& m2)
{ {
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i) m1.simd4x4() += m2.simd4x4();
m1[i] += m2[i];
return m1; return m1;
} }
@ -322,8 +307,7 @@ inline
SGMatrix<T> SGMatrix<T>
operator-(SGMatrix<T> m1, const SGMatrix<T>& m2) operator-(SGMatrix<T> m1, const SGMatrix<T>& m2)
{ {
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i) m1.simd4x4() -= m2.simd4x4();
m1[i] -= m2[i];
return m1; return m1;
} }
@ -332,22 +316,14 @@ template<typename S, typename T>
inline inline
SGMatrix<T> SGMatrix<T>
operator*(S s, SGMatrix<T> m) operator*(S s, SGMatrix<T> m)
{ { m.simd4x4() *= s; return m; }
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
m[i] *= s;
return m;
}
/// Scalar multiplication /// Scalar multiplication
template<typename S, typename T> template<typename S, typename T>
inline inline
SGMatrix<T> SGMatrix<T>
operator*(SGMatrix<T> m, S s) operator*(SGMatrix<T> m, S s)
{ { m.simd4x4() *= s; return m; }
for (unsigned i = 0; i < SGMatrix<T>::nEnts; ++i)
m[i] *= s;
return m;
}
/// Vector multiplication /// Vector multiplication
template<typename T> template<typename T>
@ -356,18 +332,7 @@ SGVec4<T>
operator*(const SGMatrix<T>& m, const SGVec4<T>& v) operator*(const SGMatrix<T>& m, const SGVec4<T>& v)
{ {
SGVec4<T> mv; SGVec4<T> mv;
T tmp = v(0); mv.v4() = m.m4x4() * v.v4();
mv(0) = tmp*m(0,0);
mv(1) = tmp*m(1,0);
mv(2) = tmp*m(2,0);
mv(3) = tmp*m(3,0);
for (unsigned i = 1; i < SGMatrix<T>::nCols; ++i) {
T tmp = v(i);
mv(0) += tmp*m(0,i);
mv(1) += tmp*m(1,i);
mv(2) += tmp*m(2,i);
mv(3) += tmp*m(3,i);
}
return mv; return mv;
} }
@ -402,20 +367,7 @@ SGMatrix<T>
operator*(const SGMatrix<T>& m1, const SGMatrix<T>& m2) operator*(const SGMatrix<T>& m1, const SGMatrix<T>& m2)
{ {
SGMatrix<T> m; SGMatrix<T> m;
for (unsigned j = 0; j < SGMatrix<T>::nCols; ++j) { m.simd4x4() = m1.simd4x4() * m2.simd4x4();
T tmp = m2(0,j);
m(0,j) = tmp*m1(0,0);
m(1,j) = tmp*m1(1,0);
m(2,j) = tmp*m1(2,0);
m(3,j) = tmp*m1(3,0);
for (unsigned i = 1; i < SGMatrix<T>::nCols; ++i) {
T tmp = m2(i,j);
m(0,j) += tmp*m1(0,i);
m(1,j) += tmp*m1(1,i);
m(2,j) += tmp*m1(2,i);
m(3,j) += tmp*m1(3,i);
}
}
return m; return m;
} }

View File

@ -336,9 +336,11 @@ private:
public: public:
simd4x4_t(void) {} simd4x4_t(void) {}
explicit simd4x4_t(const double m[4*4]) { explicit simd4x4_t(const double m[4*4]) {
const double *p = m;
for (int i=0; i<4; i++) { for (int i=0; i<4; i++) {
simd4x4[i][0] = simd4_t<double,4>((const double*)&m[4*i]).v4()[0]; simd4_t<double,4> vec4(p);
simd4x4[i][1] = simd4_t<double,4>((const double*)&m[4*i+2]).v4()[1]; simd4x4[i][0] = vec4.v4()[0]; p += 4;
simd4x4[i][1] = vec4.v4()[1];
} }
} }
@ -380,6 +382,16 @@ public:
return array; return array;
} }
inline simd4x4_t<double,4>& operator=(const double m[4*4]) {
const double *p = m;
for (int i=0; i<4; i++) {
simd4_t<double,4> vec4(p);
simd4x4[i][0] = vec4.v4()[0]; p += 4;
simd4x4[i][1] = vec4.v4()[1];
}
return *this;
}
inline simd4x4_t<double,4>& operator=(const __mtx4d_t m) { inline simd4x4_t<double,4>& operator=(const __mtx4d_t m) {
for (int i=0; i<4; i++) { for (int i=0; i<4; i++) {
simd4x4[i][0] = simd4_t<double,4>(m[i]).v4()[0]; simd4x4[i][0] = simd4_t<double,4>(m[i]).v4()[0];
@ -422,22 +434,19 @@ public:
}; };
#if 0
template<> template<>
inline simd4_t<double,4> operator*(const simd4x4_t<double,4>& m, const simd4_t<double,4>& vi) inline simd4_t<double,4> operator*(const simd4x4_t<double,4>& m, const simd4_t<double,4>& vi)
{ {
simd4_t<double,4> mv(m.m4x4()[0]); simd4_t<double,4> mv(m);
mv.v4()[0] *= vi.ptr()[0]; mv *= vi.ptr()[0];
mv.v4()[1] *= vi.ptr()[2];
for (int i=1; i<4; i+=2) { for (int i=1; i<4; i+=2) {
simd4_t<double,4> row = m.m4x4()[i]; simd4_t<double,4> row = m.m4x4()[i];
row.v4()[0] *= vi.ptr()[i]; row *= vi.ptr()[i];
row.v4()[1] *= vi.ptr()[i+2]; mv.v4()[0] += row.v4()[0];
mv += row; mv.v4()[1] += row.v4()[1];
} }
return mv; return mv;
} }
#endif
template<> template<>
inline simd4x4_t<double,4> operator*(const simd4x4_t<double,4>& m1, const simd4x4_t<double,4>& m2) inline simd4x4_t<double,4> operator*(const simd4x4_t<double,4>& m1, const simd4x4_t<double,4>& m2)