This example illustrates array-summing using a function that sums an input array of 16-byte values.
// 16 iterations of a loop
int rolled_sum(unsigned char bytes[16])
{
int i;
int sum = 0;
for (i = 0; i < 16; ++i) {
sum += bytes[i];
}
return sum;
}
// 4 iterations of a loop, with 4 additions in each iteration
int unrolled_sum(unsigned char bytes[16])
{
int i;
int sum[4] = {0, 0, 0, 0};
for (i = 0; i < 16; i += 4) {
sum[0] += bytes[i + 0];
sum[1] += bytes[i + 1];
sum[2] += bytes[i + 2];
sum[3] += bytes[i + 3];
}
return sum[0] + sum[1] + sum[2] + sum[3];
}
// Vectorized for Vector/SIMD Multimedia Extension
int vectorized_sum(unsigned char bytes[16])
{
vector unsigned char vbytes;
union {
int i[4];
vector signed int v;
} sum;
vector unsigned int zero = (vector unsigned int){0};
// Perform a misaligned vector load of the 16 bytes.
vbytes = vec_perm(vec_ld(0, bytes), vec_ld(16, bytes), vec_lvsl(0, bytes));
// Sum the 16 bytes of the vector
sum.v = vec_sums((vector signed int)vec_sum4s(vbytes, zero),
(vector signed int)zero);
// Extract the sum and return the result.
return (sum.i[3]);
}