_mm_mpsadbw_epu8
Microsoft Specific
Emits the Streaming SIMD Extensions 4 (SSE4) instruction mpsadbw. This instruction computes multiple packed sums on the absolute value of the difference between two parameters.
__m128i _mm_mpsadbw_epu8(
__m128i a,
__m128i b,
const int mask
);
Parameters
[in] a
A 128-bit parameter that contains sixteen 8-bit unsigned integers.[in] b
A 128-bit parameter that contains sixteen 8-bit unsigned integers.[in] mask
A constant that specifies which integers to use in the calculation.
Result value
A 128-bit result that contains eight 16-bit unsigned integers. The values of these integers can be computed as follows:
i = mask2 * 4
j = mask0-1 * 4
for (k = 0; k < 8; k = k + 1) {
t0 = abs(a[i + k + 0] - b[j + 0])
t1 = abs(a[i + k + 1] - b[j + 1])
t2 = abs(a[i + k + 2] - b[j + 2])
t3 = abs(a[i + k + 3] - b[j + 3])
r[k] = t0 + t1 + t2 + t3
}
Requirements
Intrinsic |
Architecture |
---|---|
_mm_mpsadbw_epu8 |
x86, x64 |
Header file <smmintrin.h>
Remarks
a[n] and b[n] indicate the nth ordered unsigned 8-bit integer of parameters a and b where a[0] and b[0] are the lowest 8 bits. r[n] is the nth ordered unsigned 16-bit element of result r, where r[0] refers to the lowest 16 bits. mask0, mask1, and mask2 are the three least significant bits of parameter mask.
Before you use this intrinsic, software must ensure that the processor supports the instruction.
Example
#include <stdio.h>
#include <smmintrin.h>
int main ()
{
__m128i a, b;
// A mask value of 0101 (5) will add four to each index
const int mask = 5;
a.m128i_u8[0] = 15;
a.m128i_u8[1] = 60;
a.m128i_u8[2] = 55;
a.m128i_u8[3] = 31;
a.m128i_u8[4] = 0;
a.m128i_u8[5] = 1;
a.m128i_u8[6] = 2;
a.m128i_u8[7] = 4;
a.m128i_u8[8] = 8;
a.m128i_u8[9] = 16;
a.m128i_u8[10] = 32;
a.m128i_u8[11] = 64;
a.m128i_u8[12] = 128;
a.m128i_u8[13] = 255;
a.m128i_u8[14] = 1;
a.m128i_u8[15] = 17;
b.m128i_u8[0] = 2;
b.m128i_u8[1] = 4;
b.m128i_u8[2] = 8;
b.m128i_u8[3] = 64;
b.m128i_u8[4] = 255;
b.m128i_u8[5] = 0;
b.m128i_u8[6] = 1;
b.m128i_u8[7] = 16;
b.m128i_u8[8] = 32;
b.m128i_u8[9] = 64;
b.m128i_u8[10] = 128;
b.m128i_u8[11] = 255;
b.m128i_u8[12] = 75;
b.m128i_u8[13] = 31;
b.m128i_u8[14] = 42;
b.m128i_u8[15] = 11;
__m128i res = _mm_mpsadbw_epu8(a, b, mask);
__m128i final;
int temp1, temp2, temp3, temp4, index;
for (index = 0; index < 8; index+)
{
temp1 = abs(a.m128i_u8[4 + index] - b.m128i_u8[4]);
temp2 = abs(a.m128i_u8[4 + index + 1] - b.m128i_u8[4 + 1]);
temp3 = abs(a.m128i_u8[4 + index + 2] - b.m128i_u8[4 + 2]);
temp4 = abs(a.m128i_u8[4 + index + 3] - b.m128i_u8[4 + 3]);
final.m128i_u16[index] = temp1 + temp2 + temp3 + temp4;
}
printf_s("Res0 should be %d: %d\nRes1 should be %d: %d\n",
final.m128i_u16[0], res.m128i_u16[0],
final.m128i_u16[1], res.m128i_u16[1]);
printf_s("Res2 should be %d: %d\nRes3 should be %d: %d\n",
final.m128i_u16[2], res.m128i_u16[2],
final.m128i_u16[3], res.m128i_u16[3]);
printf_s("Res4 should be %d: %d\nRes5 should be %d: %d\n",
final.m128i_u16[4], res.m128i_u16[4],
final.m128i_u16[5], res.m128i_u16[5]);
printf_s("Res6 should be %d: %d\nRes7 should be %d: %d\n",
final.m128i_u16[6], res.m128i_u16[6],
final.m128i_u16[7], res.m128i_u16[7]);
return 0;
}
Res0 should be 269: 269 Res1 should be 267: 267 Res2 should be 264: 264 Res3 should be 290: 290 Res4 should be 342: 342 Res5 should be 446: 446 Res6 should be 653: 653 Res7 should be 588: 588