// GPU kernel __global__ void integrate(int *n, double *sum) { double h, x; int i; *sum = 0.0; h = 1.0 / (double) *n; for (i = 1; i <= *n; i++) { x = h * ((double)i - 0.5); *sum += 4.0 / (1.0 + x*x); } *sum *= h; } // notice the underscore "_" after the function name -- needed by gfortran extern "C" void fortran_call_integrate_(int *n, double *pi) { int *n_d; // device copy of n double *pi_d; // device copy of pi // Allocate memory on GPU cudaMalloc( (void **) &n_d, sizeof(int) * 1 ); cudaMalloc( (void **) &pi_d, sizeof(double) * 1 ); // copy from CPU to GPU cudaMemcpy( n_d, n, sizeof(int) * 1, cudaMemcpyHostToDevice ); integrate<<< 1, 1 >>>(n_d, pi_d); // copy back from GPU to CPU cudaMemcpy( pi, pi_d, sizeof(double) * 1, cudaMemcpyDeviceToHost ); // free GPU memory cudaFree(n_d); cudaFree(pi_d); }