The vector addition sample adds two vectors A and B to produce C, where Ci = Ai + Bi. In this particular case the vectors A and B are filled with trigonometric data and summed. The CPU version presented is self explanatory. Both C and Fortran versions are presented. The full source can be viewed or downloaded from the OLCF GitHub.

vecAdd.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

int main( int argc, char *argv[] )
{
    
    // Size of vectors
    int n = 100000;
    
    // Input vectors
    double *a;
    double *b;
    // Output vector
    double *c;
    
    // Size, in bytes, of each vector
    size_t bytes = n*sizeof(double);
    
    // Allocate memory for each vector
    a = (double*)malloc(bytes);
    b = (double*)malloc(bytes);
    c = (double*)malloc(bytes);
    
    // Initialize content of input vectors, vector a[i] = sin(i)^2 vector b[i] = cos(i)^2
    int i;
    for(i=0; i<n; i++) {
        a[i] = sin(i)*sin(i);
        b[i] = cos(i)*cos(i);
    }
    
    // Sum component wise and save result into vector c
    for(i=0; i<n; i++){
        c[i] = a[i] + b[i];
    }
    
    // Sum up vector c and print result divided by n, this should equal 1 within error
    double sum = 0;
    for(i=0; i<n; i++) {
        sum += c[i];
    }
    sum = sum/n;
    printf("final result: %f\n", sum);
    
    // Release memory
    free(a);
    free(b);
    free(c);
    
    return 0;
}

Compiling vecAdd.c

$ cc vecAdd.c -o vecAdd.out

Running vecAdd.c

$ aprun ./vecAdd.out
final result: 1.000000

vecAdd.f90

program main

    ! Size of vectors
    integer :: n = 100000

    ! Input vectors
    real(8),dimension(:),allocatable :: a
    real(8),dimension(:),allocatable :: b
    ! Output vector
    real(8),dimension(:),allocatable :: c

    integer :: i
    real(8) :: sum

    ! Allocate memory for each vector
    allocate(a(n))
    allocate(b(n))
    allocate(c(n))

    ! Initialize content of input vectors, vector a[i] = sin(i)^2 vector b[i] = cos(i)^2
    do i=1,n
        a(i) = sin(i*1D0)*sin(i*1D0)
        b(i) = cos(i*1D0)*cos(i*1D0)
    enddo

    ! Sum component wise and save result into vector c
    do i=1,n
        c(i) = a(i) + b(i)
    enddo

    ! Sum up vector c and print result divided by n, this should equal 1 within error
    do i=1,n
        sum = sum +  c(i)
    enddo
    sum = sum/n
    print *, 'final result: ', sum

    ! Release memory
    deallocate(a)
    deallocate(b)
    deallocate(c)

end program

Compiling vecAdd.f90

$ ftn vecAdd.f90 -o vecAdd.out

Running vecAdd.f90

$ aprun ./vecAdd.out
final result: 1.000000