#include <iostream>
#include <arm_sve.h> // The magical SVE library for modern ARM chips
// A function to add two arrays together using SVE
void add_arrays_sve(float* A, float* B, float* C, int n) {
int i = 0;
// Keep looping until we have processed all 'n' elements
while (i < n) {
// 1. THE MAGIC TAPE (Predicate)
// This generates a true/false mask based on how many numbers are left.
// It tells the CPU to "turn off" slots we don't need so we don't crash.
svbool_t mask = svwhilelt_b32(i, n);
// 2. Load data from A and B into our stretchy vectors, using the mask
svfloat32_t vecA = svld1_f32(mask, &A[i]);
svfloat32_t vecB = svld1_f32(mask, &B[i]);
// 3. Add the vectors together, safely ignoring masked-off slots
svfloat32_t vecC = svadd_f32_z(mask, vecA, vecB);
// 4. Store the results back into standard memory
svst1_f32(mask, &C[i], vecC);
// 5. THE STRETCHY PART
// svcntw() asks the CPU: "How many 32-bit words fit in your vector?"
// We move forward by that amount, whether it's 4, 8, 16, or 64!
i += svcntw();
}
}
int main() {
int n = 10; // 10 numbers total
float A[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
float B[10] = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10};
float C[10] = {0}; // Where the answers go
add_arrays_sve(A, B, C, n);
std::cout << "Results of SVE math: ";
for(int i = 0; i < n; ++i) {
std::cout << C[i] << " ";
}
std::cout << "\n";
return 0;
}
Apr 3, 2026
[SIMD][SVE] example
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment
Note: Only a member of this blog may post a comment.