numerical-computing/conjugate_gradient.c at main · afasulo/numerical-computing · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*
 * conjugate_gradient.c
 *
 * This program demonstrates the Conjugate Gradient (CG) method for
 * solving a system of linear equations Ax = b, where A is
 * symmetric and positive-definite.
 *
 * The algorithm is parallelized using OpenMP.
 * The key parallel operations are:
 * 1. Matrix-Vector product (A*p)
 * 2. Dot products (r^T*r and p^T*Ap)
 * 3. Vector updates (x = x + a*p, r = r - a*Ap, p = r + b*p)
 *
 * System to solve (same as Jacobi example, A is SPD):
 * 10x +  y +  z = 12
 * x + 10y +  z = 12
 * x +  y + 10z = 12
 *
 * Solution: x=1, y=1, z=1
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>

// Define the size of the system
#define N 3

// Set convergence criteria
#define MAX_ITERATIONS N // CG converges in at most N iterations (in perfect arithmetic)
#define TOLERANCE 1e-6

// Helper function to print a vector
void print_vector(double vec[N]) {
    printf("[ ");
    for (int i = 0; i < N; i++) {
        printf("%.6f ", vec[i]);
    }
    printf("]\n");
}

/*
 * Parallel matrix-vector multiplication: out = A * vec
 */
void mat_vec_mul(double A[N][N], double vec[N], double out[N]) {
    // This loop is parallelized. Each thread handles one or more
    // rows (i) and computes the corresponding element in 'out'.
    #pragma omp parallel for
    for (int i = 0; i < N; i++) {
        out[i] = 0.0;
        for (int j = 0; j < N; j++) {
            out[i] += A[i][j] * vec[j];
        }
    }
}

/*
 * Parallel dot product: returns vec1^T * vec2
 */
double dot_product(double vec1[N], double vec2[N]) {
    double sum = 0.0;
    // This loop is parallelized using a reduction.
    // Each thread computes a partial sum, and OpenMP
    // combines them safely at the end.
    #pragma omp parallel for reduction(+:sum)
    for (int i = 0; i < N; i++) {
        sum += vec1[i] * vec2[i];
    }
    return sum;
}

/*
 * Parallel vector update: vec1 = vec1 + scalar * vec2
 * (This is a SAXPY-like operation)
 */
void vec_update(double vec1[N], double scalar, double vec2[N]) {
    // This loop is "embarrassingly parallel".
    #pragma omp parallel for
    for (int i = 0; i < N; i++) {
        vec1[i] += scalar * vec2[i];
    }
}

int main() {
    double A[N][N] = {
        {10.0, 1.0, 1.0},
        {1.0, 10.0, 1.0},
        {1.0, 1.0, 10.0}
    };
    double b[N] = {12.0, 12.0, 12.0};
    double x[N] = {0.0, 0.0, 0.0}; // Initial guess

    double r[N]; // Residual vector (r = b - Ax)
    double p[N]; // Search direction
    double Ap[N]; // Result of A * p

    double rs_old, rs_new, alpha, beta, pAp;
    double error;

    // --- Initialization ---
    // r = b - A*x (Initial residual)
    // Since x=0, r = b.
    // This can be parallelized.
    #pragma omp parallel for
    for(int i=0; i<N; i++) {
        r[i] = b[i];
        p[i] = r[i]; // p_0 = r_0
    }

    // rs_old = r^T * r
    rs_old = dot_product(r, r);
    error = sqrt(rs_old); // Initial error

    printf("Starting Conjugate Gradient...\n");
    printf("Initial guess x(0): ");
    print_vector(x);
    printf("Initial Error (Residual Norm): %.8f\n", error);
    printf("--------------------------------------\n");

    // --- CG Iteration Loop ---
    for (int iter = 0; iter < MAX_ITERATIONS && error > TOLERANCE; iter++) {
        // 1. Calculate A*p
        mat_vec_mul(A, p, Ap);

        // 2. Calculate p^T * A*p
        pAp = dot_product(p, Ap);

        // 3. Calculate alpha = (r^T * r) / (p^T * A*p)
        alpha = rs_old / pAp;

        // 4. Update solution: x = x + alpha * p
        vec_update(x, alpha, p);

        // 5. Update residual: r = r - alpha * A*p
        vec_update(r, -alpha, Ap);

        // 6. Calculate new residual norm: rs_new = r^T * r
        rs_new = dot_product(r, r);
        error = sqrt(rs_new);

        // 7. Calculate beta = rs_new / rs_old
        beta = rs_new / rs_old;

        // 8. Update search direction: p = r + beta * p
        // This must be done carefully.
        #pragma omp parallel for
        for(int i=0; i<N; i++) {
            p[i] = r[i] + beta * p[i];
        }

        // Update rs_old for next iteration
        rs_old = rs_new;

        printf("Iter %2d | Residual Norm: %.8f\n", iter + 1, error);
    }

    printf("--------------------------------------\n");
    if (error <= TOLERANCE) {
        printf("Convergence reached.\n");
        printf("Final Solution:\n");
        print_vector(x);
    } else {
        printf("Failed to converge.\n");
        printf("Current Solution:\n");
        print_vector(x);
    }

    return 0;
}