SIMD pixel.cpp

Run Settings
LanguageC++
Language Version
Run Command
#include <stdint.h> #include <math.h> #include <time.h> #include <iostream> #include <immintrin.h> using namespace std; #define FENC_STRIDE 64 typedef unsigned char uint8_t; typedef uint8_t pixel; // /* Original */ // template <int lx, int ly> // void sad_x4(const pixel *pix1, const pixel *pix2, const pixel *pix3, const pixel *pix4, const pixel *pix5, intptr_t frefstride, int32_t *res) // { // res[0] = 0; // res[1] = 0; // res[2] = 0; // res[3] = 0; // for (int y = 0; y < ly; y++) // { // for (int x = 0; x < lx; x++) // { // res[0] += abs(pix1[x] - pix2[x]); // res[1] += abs(pix1[x] - pix3[x]); // res[2] += abs(pix1[x] - pix4[x]); // res[3] += abs(pix1[x] - pix5[x]); // } // pix1 += FENC_STRIDE; // pix2 += frefstride; // pix3 += frefstride; // pix4 += frefstride; // pix5 += frefstride; // } // } /* SIMD */ template <int lx, int ly> void sad_x4(const pixel *pix1, const pixel *pix2, const pixel *pix3, const pixel *pix4, const pixel *pix5, intptr_t frefstride, int32_t *res) { // __mm(256)_(sad_x4)_(epu8) res[0] = 0; res[1] = 0; res[2] = 0; res[3] = 0; for (int y = 0; y < ly; y++) { __m256i_u _pix1 = _mm256_loadu_si256((__m256i_u *)pix1); // TODO: Doesn't work int *_test = (int *)&_pix1; for (int x = 0; x < lx; x++) { cout << "\n" << _test[x]; } for (int x = 0; x < lx; x++) { res[0] += abs(pix1[x] - pix2[x]); } for (int x = 0; x < lx; x++) { res[1] += abs(pix1[x] - pix3[x]); } for (int x = 0; x < lx; x++) { res[2] += abs(pix1[x] - pix4[x]); } for (int x = 0; x < lx; x++) { res[3] += abs(pix1[x] - pix5[x]); } pix1 += FENC_STRIDE; pix2 += frefstride; pix3 += frefstride; pix4 += frefstride; pix5 += frefstride; } } int main() { int32_t res[4]; pixel a[] = {1, 2, 3, 4, 5, 6, 7, 8}; clock_t start, end; start = clock(); sad_x4<8, 8>(a, a, a, a, a, 0, res); end = clock(); double time_taken = double(end - start) / double(CLOCKS_PER_SEC); cout << "\nTime taken: " << time_taken << "s\n"; }
Editor Settings
Theme
Key bindings
Full width
Lines