Apr 3, 2026

[SIMD][SVE] example


#include <iostream>
#include <arm_sve.h> // The magical SVE library for modern ARM chips

// A function to add two arrays together using SVE
void add_arrays_sve(float* A, float* B, float* C, int n) {
    int i = 0;
    
    // Keep looping until we have processed all 'n' elements
    while (i < n) {
        // 1. THE MAGIC TAPE (Predicate)
        // This generates a true/false mask based on how many numbers are left.
        // It tells the CPU to "turn off" slots we don't need so we don't crash.
        svbool_t mask = svwhilelt_b32(i, n);

        // 2. Load data from A and B into our stretchy vectors, using the mask
        svfloat32_t vecA = svld1_f32(mask, &A[i]);
        svfloat32_t vecB = svld1_f32(mask, &B[i]);

        // 3. Add the vectors together, safely ignoring masked-off slots
        svfloat32_t vecC = svadd_f32_z(mask, vecA, vecB);

        // 4. Store the results back into standard memory
        svst1_f32(mask, &C[i], vecC);

        // 5. THE STRETCHY PART
        // svcntw() asks the CPU: "How many 32-bit words fit in your vector?"
        // We move forward by that amount, whether it's 4, 8, 16, or 64!
        i += svcntw(); 
    }
}

int main() {
    int n = 10; // 10 numbers total
    float A[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
    float B[10] = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10};
    float C[10] = {0}; // Where the answers go

    add_arrays_sve(A, B, C, n);

    std::cout << "Results of SVE math: ";
    for(int i = 0; i < n; ++i) {
        std::cout << C[i] << " ";
    }
    std::cout << "\n";

    return 0;
}

No comments:

Post a Comment

Note: Only a member of this blog may post a comment.