This example illustrates array-summing using a function that sums an input array of 16-byte values.
// 16 iterations of a loop int rolled_sum(unsigned char bytes[16]) { int i; int sum = 0; for (i = 0; i < 16; ++i) { sum += bytes[i]; } return sum; } // 4 iterations of a loop, with 4 additions in each iteration int unrolled_sum(unsigned char bytes[16]) { int i; int sum[4] = {0, 0, 0, 0}; for (i = 0; i < 16; i += 4) { sum[0] += bytes[i + 0]; sum[1] += bytes[i + 1]; sum[2] += bytes[i + 2]; sum[3] += bytes[i + 3]; } return sum[0] + sum[1] + sum[2] + sum[3]; } // Vectorized for Vector/SIMD Multimedia Extension int vectorized_sum(unsigned char bytes[16]) { vector unsigned char vbytes; union { int i[4]; vector signed int v; } sum; vector unsigned int zero = (vector unsigned int){0}; // Perform a misaligned vector load of the 16 bytes. vbytes = vec_perm(vec_ld(0, bytes), vec_ld(16, bytes), vec_lvsl(0, bytes)); // Sum the 16 bytes of the vector sum.v = vec_sums((vector signed int)vec_sum4s(vbytes, zero), (vector signed int)zero); // Extract the sum and return the result. return (sum.i[3]); }