🎉 First commit

This commit is contained in:
Nicolas Rojas 2025-05-29 20:26:11 -05:00
commit 3b8d21dde2
Signed by: nicolas
SSH key fingerprint: SHA256:gi4v1tDcXHbV+fkvqqs9b5rkFlo4Q9DHXp90MifkZK0
17 changed files with 571 additions and 0 deletions

55
.gitignore vendored Normal file
View file

@ -0,0 +1,55 @@
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# Experiments data
*.csv

5
LICENSE Normal file
View file

@ -0,0 +1,5 @@
Copyright (c) 2024 Nicolas Rojas
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

86
MatMult/MM1c.c Normal file
View file

@ -0,0 +1,86 @@
/************************************************************************
* Autor: J. Corredor
* Fecha: Octubre 2023
* Computación de Alto Rendimiento
* Maestría en Inteligencia Artificial
* Tema: Programa de Multiplicación de Matrices usando hilos OpenMP
* -Algorimo Clásico filasXcolumnas
*************************************************************************/
#include "sample.h"
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#define DATA_SZ (1024 * 1024 * 64 * 3)
static double MEM_CHUNK[DATA_SZ];
void Matrix_Init_col(int SZ, double *a, double *b, double *c) {
int j, k;
for (j = 0; j < SZ; j++) {
a[j + k * SZ] = 2.0 * (j + k);
b[j + k * SZ] = 3.2 * (j - k);
c[j + k * SZ] = 1.0;
}
}
int main(int argc, char **argv) {
int N;
if (argc < 2) {
printf("MM1c MatrixSize [Sample arguments ...]\n");
return -1;
}
N = (int)atof(argv[1]);
argc--;
argv++;
if (N > 1024 * 10) {
printf("Unvalid MatrixSize\n");
return -1;
}
Sample_Init(argc, argv);
#pragma omp parallel
{
int NTHR, THR, SZ;
int i, j, k;
double *a, *b, *c;
SZ = N;
THR = Sample_PAR_install();
NTHR = omp_get_num_threads();
a = MEM_CHUNK;
b = a + SZ * SZ;
c = b + SZ * SZ;
#pragma omp master
Matrix_Init_col(SZ, a, b, c);
Sample_Start(THR);
#pragma omp for
for (i = 0; i < SZ; i++)
for (j = 0; j < SZ; j++) {
double *pA, *pB, S;
S = 0.0;
pA = a + (i * SZ);
pB = b + j;
for (k = SZ; k > 0; k--, pA++, pB += SZ)
S += (*pA * *pB);
c[i * SZ + j] = S;
}
Sample_Stop(THR);
}
Sample_End(&N);
}

88
MatMult/MM1r.c Normal file
View file

@ -0,0 +1,88 @@
/************************************************************************
* Autor: N Rojas
* Fecha: Noviembre 2023
* Computación de Alto Rendimiento
* Maestría en Inteligencia Artificial
* Tema: Programa de Multiplicación de Matrices usando hilos OpenMP
* -Algorimo filasXfilas
*************************************************************************/
#include "sample.h"
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#define DATA_SZ (1024 * 1024 * 64 * 3)
static double MEM_CHUNK[DATA_SZ];
void Matrix_Init_col(int SZ, double *a, double *b, double *c) {
int j, k;
for (j = 0; j < SZ; j++) {
a[j + k * SZ] = 2.0 * (j + k);
b[j + k * SZ] = 3.2 * (j - k);
c[j + k * SZ] = 0.0;
}
}
int main(int argc, char **argv) {
int N;
if (argc < 2) {
printf("MM1r MatrixSize [Sample arguments ...]\n");
return -1;
}
N = (int)atof(argv[1]);
argc--;
argv++;
if (N > 1024 * 10) {
printf("Unvalid MatrixSize\n");
return -1;
}
Sample_Init(argc, argv);
#pragma omp parallel
{
int NTHR, THR, SZ;
int i, j, k;
double *a, *b, *c;
SZ = N;
THR = Sample_PAR_install();
NTHR = omp_get_num_threads();
a = MEM_CHUNK;
b = a + SZ * SZ;
c = b + SZ * SZ;
#pragma omp master
Matrix_Init_col(SZ, a, b, c);
Sample_Start(THR);
#pragma omp for
for (i = 0; i < SZ; ++i) {
double *pA, *pB, S;
pA = a + (i * SZ);
for (j = 0; j < SZ; ++j) {
pB = b + (j * SZ);
for (k = 0; k < SZ; ++k, ++pB) {
S = *pA * *pB;
c[i * SZ + k] += S;
}
++pA;
}
}
Sample_Stop(THR);
}
Sample_End(&N);
}

20
MatMult/Makefile Normal file
View file

@ -0,0 +1,20 @@
GCC = gcc
oT = -fopenmp -O3
CFLAGS = -lm
oL= Otime.c
BINDIR = ../
PROGS = $(BINDIR)MM1c $(BINDIR)MM1r
all: MM1c MM1r
clean:
$(RM) $(PROGS)
MM1c:
$(GCC) $(oT) $(oL) $@.c -o $(BINDIR)$@ $(CFLAGS)
MM1r:
$(GCC) $(oT) $(oL) $@.c -o $(BINDIR)$@ $(CFLAGS)

63
MatMult/Otime.c Normal file
View file

@ -0,0 +1,63 @@
#include <errno.h>
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#define MAX_THREADS 20
struct timeval start[MAX_THREADS];
struct timeval stop[MAX_THREADS];
static int N_THREADS;
void Sample_Start(int THR) {
#pragma omp barrier
gettimeofday(start + THR, (void *)0);
}
void Sample_Stop(int THR) { gettimeofday(&(stop[THR]), (void *)0); }
void Sample_Init(int argc, char *argv[]) {
if (argc < 3) {
printf("Sample parameters: NumberThreads \n");
exit(1);
}
N_THREADS = (int)atof(argv[1]);
if (!N_THREADS || N_THREADS > MAX_THREADS) {
printf("Number of Threads is not valid\n");
exit(1);
}
omp_set_num_threads(N_THREADS);
}
int Sample_PAR_install() {
int THR;
THR = omp_get_thread_num();
return THR;
}
void Sample_End(const int *SZ) {
int THR, i;
for (THR = 0; THR < N_THREADS; THR++) {
printf("%1.0f,", (double)*SZ);
printf("%1.0f,", (double)N_THREADS);
printf("%1.0f,", (double)THR);
stop[THR].tv_usec -= start[THR].tv_usec;
if (stop[THR].tv_usec < 0) {
stop[THR].tv_usec += 1000000;
stop[THR].tv_sec--;
}
stop[THR].tv_sec -= start[THR].tv_sec;
printf("%1.0f\n", (double)(stop[THR].tv_sec * 1000000 + stop[THR].tv_usec));
}
}

5
MatMult/sample.h Normal file
View file

@ -0,0 +1,5 @@
extern void Sample_Init(int argc, char *argv[]);
extern void Sample_Start(int THR);
void Sample_Stop(int THR);
int Sample_PAR_install(void);
void Sample_End(const int *SZ);

60
README.md Normal file
View file

@ -0,0 +1,60 @@
# Parallel matrix multiplication
Performance tests for parallel matrix multiplication algorithms.
## Usage
1. Install the required python libraries, found in `requirements.txt`. The following command shows how to do it using `pip`, but `conda` or any package manager can also be used.
```shell
pip install -r requirements.txt
```
2. Build the matrix multiplication executables using `make`.
```shell
cd MatMult
make
```
3. Run the experiments using the launcher.
```shell
cd ..
python launcher.py experiments.csv
```
4. Generate the graphics, using the results of the experiments from every machine.
```shell
python graphics.py experiments1.csv experiments2.csv experiments3.csv -o img/
```
## Results
The experimentation results are summarized in the following figures.
![Matrix size vs Time line plot](img/size-time.png)
![Threads vs Time line plot](img/threads-time.png)
![Matrix size vs Threads heatmap](img/size-threads-time.png)
![Matrix size vs Threads heatmap](img/distribution.png)
In this case, the experiments were run in 3 different machines:
- A ml.c5.9xlarge AWS instance, with 36 vCPUs and 72 GiB RAM.
- A ml.m5.8xlarge AWS instance, with 32 vCPUs and 128 GiB RAM.
- A ml.r5.8xlarge AWS instance, with 32 vCPUs and 128 GiB RAM.
Every matrix size - number of threads combination was executed 30 times, in order to make the experiments statistically significant. The values in the graphics represent the mean values of those 30 repetitions.
### Metrics
The following metrics summarize the experimentation results, regarding the algorithms performance for different number of threads:
- Speed up: $$Sp = \frac{t_{serial}}{t_{parallel}}$$
![SpeedUp](img/speedup.png)
- Efficiency: $$Ep = \frac{Sp}{p}$$
![Efficiency](img/efficiency.png)

134
graphics.py Normal file
View file

@ -0,0 +1,134 @@
# Imports
from argparse import ArgumentParser
from os import path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib.colors import LogNorm
# Parse execution arguments
parser = ArgumentParser(description="Create graphics from the experiments data.")
parser.add_argument(
"input_files",
help="CSV files to be read and processed. Should be outputs of launcher.py, from different machines",
nargs="+",
)
parser.add_argument(
"-o", "--out", help="Folder where the images will be saved", required=True
)
args = parser.parse_args()
input_files = args.input_files
out_folder = args.out
# Read the data in every csv input and combine into single dataframe
data = []
for input_file in input_files:
partial_data = pd.read_csv(input_file)
partial_data["Machine"] = input_file.split(".")[0]
data.append(partial_data)
data = pd.concat(data, ignore_index=True)
# Transform time from microseconds to seconds
data["Time (secs)"] = data["Time"] * 10**-6
# Fix machine names to agree with aws documentation
data["Machine"] = data.apply(lambda x: x["Machine"].replace("_", "."), axis=1)
# Obtain list of unique machines and algorithms
machines = data["Machine"].unique()
algorithms = data["Algorithm"].unique()
num_machines = len(machines)
num_algorithms = len(algorithms)
# Create matplotlib figure to plot time heatmap
fig, ax = plt.subplots(num_machines, num_algorithms)
fig_width = 12
fig.set_size_inches(fig_width, 3 * fig_width // 2)
ax_idx = 0
# Iterate over every machine and algorithm combination
for machine in machines:
for algorithm in algorithms:
# Obtain axis position in figure
position = divmod(ax_idx, num_algorithms)
# Create time heatmap, with matrix vs threads dimensions
heatmap = data.query(f"Algorithm == '{algorithm}' and Machine == '{machine}'")
heatmap = heatmap.pivot_table(
values="Time (secs)",
index="Matrix_Size",
columns="N_Threads",
aggfunc="mean",
)
sns.heatmap(heatmap, norm=LogNorm(), ax=ax[*position])
ax[*position].set_title(f"Machine={machine} | Algorithm={algorithm}")
ax_idx += 1
fig.suptitle(
"Time (secs) for every threads-size combination", fontsize="xx-large", x=0.5, y=0.92
)
# Save figure
plt.savefig(path.join(out_folder, "size-threads-time.png"))
# Clear figure
plt.clf()
plt.cla()
# Define data subset with max number of threads and matrix size
data_size2k_threads20 = data.query("Matrix_Size == 2000 and N_Threads == 20")
# Create matplotlib figure to plot time heatmap
fig, ax = plt.subplots(num_machines, num_algorithms)
fig_width = 12
fig.set_size_inches(fig_width, 3 * fig_width // 2)
ax_idx = 0
# Iterate over every machine and algorithm combination
for machine in machines:
for algorithm in algorithms:
# Obtain axis position in figure
position = divmod(ax_idx, num_algorithms)
# Create time distribution plot as violinplot
dist = data_size2k_threads20.query(
f"Algorithm == '{algorithm}' and Machine == '{machine}'"
)
sns.violinplot(dist, y="Time (secs)", inner="quart", ax=ax[*position])
ax[*position].set_title(f"Machine={machine} | Algorithm={algorithm}")
ax_idx += 1
fig.suptitle(
"Time (secs) distribution for Matrix_Size=2000, N_Threads=20",
fontsize="xx-large",
x=0.5,
y=0.92,
)
# Save figure
plt.savefig(path.join(out_folder, "distribution.png"))
# Clear figure
plt.clf()
plt.cla()
# Plot threads vs time lineplots
ax = sns.relplot(
data,
x="N_Threads",
y="Time (secs)",
hue="Matrix_Size",
row="Machine",
col="Algorithm",
palette="plasma",
kind="line",
)
# Save figure
plt.savefig(path.join(out_folder, "threads-time.png"))
# Clear figure
plt.clf()
plt.cla()
# Plot matrix_size vs time lineplots
data["N_Threads"] = data["N_Threads"].astype(str)
ax = sns.relplot(
data,
x="Matrix_Size",
y="Time (secs)",
hue="N_Threads",
row="Machine",
col="Algorithm",
palette="plasma",
kind="line",
)
# Save figure
plt.savefig(path.join(out_folder, "size-time.png"))
# Program end

BIN
img/distribution.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 KiB

BIN
img/efficiency.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

BIN
img/size-threads-time.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

BIN
img/size-time.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 139 KiB

BIN
img/speedup.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
img/threads-time.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

52
launcher.py Normal file
View file

@ -0,0 +1,52 @@
from argparse import ArgumentParser
from os import popen
from pandas import DataFrame
from tqdm import trange
def single_experiment(matrix_size, threads):
data = []
executables = ("MM1c", "MM1r")
algorithms = ("row-column", "row-row")
for executable, algorithm in zip(executables, algorithms):
stream = popen(f"./{executable} {matrix_size} {threads} 0")
for line in stream.readlines():
values = line.strip().split(",")
values.append(algorithm)
data.append(values)
return data
def all_experiments(matrix_sizes, threads, repetitions):
data = []
for matrix_size in matrix_sizes:
for thread in threads:
for _ in trange(
repetitions,
desc=f"Size: {matrix_size}, {thread} threads",
unit="exec",
):
data.extend(single_experiment(matrix_size, thread))
return data
if __name__ == "__main__":
parser = ArgumentParser(
description="Run matrix multiplication experiments and save data as CSV.",
epilog="The output of this program should be used with graphics.py",
)
parser.add_argument(
"output_file",
help="CSV file where the results will be saved",
)
args = parser.parse_args()
matrix_sizes = range(200, 2001, 200)
threads = range(2, 21, 2)
repetitions = 30
data = all_experiments(matrix_sizes, threads, repetitions)
columns = ["Matrix_Size", "N_Threads", "Thread", "Time", "Algorithm"]
data = DataFrame(data, columns=columns)
data.to_csv(args.output_file, index=False)

3
requirements.txt Normal file
View file

@ -0,0 +1,3 @@
tqdm
pandas
seaborn