🎉 First commit
This commit is contained in:
commit
3b8d21dde2
17 changed files with 571 additions and 0 deletions
55
.gitignore
vendored
Normal file
55
.gitignore
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
# Prerequisites
|
||||
*.d
|
||||
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
*.elf
|
||||
|
||||
# Linker output
|
||||
*.ilk
|
||||
*.map
|
||||
*.exp
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
*.la
|
||||
*.lo
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
|
||||
# Debug files
|
||||
*.dSYM/
|
||||
*.su
|
||||
*.idb
|
||||
*.pdb
|
||||
|
||||
# Kernel Module Compile Results
|
||||
*.mod*
|
||||
*.cmd
|
||||
.tmp_versions/
|
||||
modules.order
|
||||
Module.symvers
|
||||
Mkfile.old
|
||||
dkms.conf
|
||||
|
||||
# Experiments data
|
||||
*.csv
|
5
LICENSE
Normal file
5
LICENSE
Normal file
|
@ -0,0 +1,5 @@
|
|||
Copyright (c) 2024 Nicolas Rojas
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
86
MatMult/MM1c.c
Normal file
86
MatMult/MM1c.c
Normal file
|
@ -0,0 +1,86 @@
|
|||
/************************************************************************
|
||||
* Autor: J. Corredor
|
||||
* Fecha: Octubre 2023
|
||||
* Computación de Alto Rendimiento
|
||||
* Maestría en Inteligencia Artificial
|
||||
* Tema: Programa de Multiplicación de Matrices usando hilos OpenMP
|
||||
* -Algorimo Clásico filasXcolumnas
|
||||
*************************************************************************/
|
||||
|
||||
#include "sample.h"
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#define DATA_SZ (1024 * 1024 * 64 * 3)
|
||||
|
||||
static double MEM_CHUNK[DATA_SZ];
|
||||
|
||||
void Matrix_Init_col(int SZ, double *a, double *b, double *c) {
|
||||
int j, k;
|
||||
for (j = 0; j < SZ; j++) {
|
||||
a[j + k * SZ] = 2.0 * (j + k);
|
||||
b[j + k * SZ] = 3.2 * (j - k);
|
||||
c[j + k * SZ] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int N;
|
||||
|
||||
if (argc < 2) {
|
||||
printf("MM1c MatrixSize [Sample arguments ...]\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
N = (int)atof(argv[1]);
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (N > 1024 * 10) {
|
||||
printf("Unvalid MatrixSize\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
Sample_Init(argc, argv);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int NTHR, THR, SZ;
|
||||
int i, j, k;
|
||||
double *a, *b, *c;
|
||||
|
||||
SZ = N;
|
||||
THR = Sample_PAR_install();
|
||||
NTHR = omp_get_num_threads();
|
||||
|
||||
a = MEM_CHUNK;
|
||||
b = a + SZ * SZ;
|
||||
c = b + SZ * SZ;
|
||||
|
||||
#pragma omp master
|
||||
Matrix_Init_col(SZ, a, b, c);
|
||||
|
||||
Sample_Start(THR);
|
||||
|
||||
#pragma omp for
|
||||
for (i = 0; i < SZ; i++)
|
||||
for (j = 0; j < SZ; j++) {
|
||||
double *pA, *pB, S;
|
||||
S = 0.0;
|
||||
pA = a + (i * SZ);
|
||||
pB = b + j;
|
||||
for (k = SZ; k > 0; k--, pA++, pB += SZ)
|
||||
S += (*pA * *pB);
|
||||
c[i * SZ + j] = S;
|
||||
}
|
||||
|
||||
Sample_Stop(THR);
|
||||
}
|
||||
|
||||
Sample_End(&N);
|
||||
}
|
88
MatMult/MM1r.c
Normal file
88
MatMult/MM1r.c
Normal file
|
@ -0,0 +1,88 @@
|
|||
/************************************************************************
|
||||
* Autor: N Rojas
|
||||
* Fecha: Noviembre 2023
|
||||
* Computación de Alto Rendimiento
|
||||
* Maestría en Inteligencia Artificial
|
||||
* Tema: Programa de Multiplicación de Matrices usando hilos OpenMP
|
||||
* -Algorimo filasXfilas
|
||||
*************************************************************************/
|
||||
|
||||
#include "sample.h"
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#define DATA_SZ (1024 * 1024 * 64 * 3)
|
||||
|
||||
static double MEM_CHUNK[DATA_SZ];
|
||||
|
||||
void Matrix_Init_col(int SZ, double *a, double *b, double *c) {
|
||||
int j, k;
|
||||
for (j = 0; j < SZ; j++) {
|
||||
a[j + k * SZ] = 2.0 * (j + k);
|
||||
b[j + k * SZ] = 3.2 * (j - k);
|
||||
c[j + k * SZ] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int N;
|
||||
|
||||
if (argc < 2) {
|
||||
printf("MM1r MatrixSize [Sample arguments ...]\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
N = (int)atof(argv[1]);
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (N > 1024 * 10) {
|
||||
printf("Unvalid MatrixSize\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
Sample_Init(argc, argv);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int NTHR, THR, SZ;
|
||||
int i, j, k;
|
||||
double *a, *b, *c;
|
||||
|
||||
SZ = N;
|
||||
THR = Sample_PAR_install();
|
||||
NTHR = omp_get_num_threads();
|
||||
|
||||
a = MEM_CHUNK;
|
||||
b = a + SZ * SZ;
|
||||
c = b + SZ * SZ;
|
||||
|
||||
#pragma omp master
|
||||
Matrix_Init_col(SZ, a, b, c);
|
||||
|
||||
Sample_Start(THR);
|
||||
|
||||
#pragma omp for
|
||||
for (i = 0; i < SZ; ++i) {
|
||||
double *pA, *pB, S;
|
||||
pA = a + (i * SZ);
|
||||
for (j = 0; j < SZ; ++j) {
|
||||
pB = b + (j * SZ);
|
||||
for (k = 0; k < SZ; ++k, ++pB) {
|
||||
S = *pA * *pB;
|
||||
c[i * SZ + k] += S;
|
||||
}
|
||||
++pA;
|
||||
}
|
||||
}
|
||||
|
||||
Sample_Stop(THR);
|
||||
}
|
||||
|
||||
Sample_End(&N);
|
||||
}
|
20
MatMult/Makefile
Normal file
20
MatMult/Makefile
Normal file
|
@ -0,0 +1,20 @@
|
|||
GCC = gcc
|
||||
oT = -fopenmp -O3
|
||||
|
||||
CFLAGS = -lm
|
||||
|
||||
oL= Otime.c
|
||||
|
||||
BINDIR = ../
|
||||
PROGS = $(BINDIR)MM1c $(BINDIR)MM1r
|
||||
|
||||
all: MM1c MM1r
|
||||
|
||||
clean:
|
||||
$(RM) $(PROGS)
|
||||
|
||||
MM1c:
|
||||
$(GCC) $(oT) $(oL) $@.c -o $(BINDIR)$@ $(CFLAGS)
|
||||
|
||||
MM1r:
|
||||
$(GCC) $(oT) $(oL) $@.c -o $(BINDIR)$@ $(CFLAGS)
|
63
MatMult/Otime.c
Normal file
63
MatMult/Otime.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
#include <errno.h>
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#define MAX_THREADS 20
|
||||
|
||||
struct timeval start[MAX_THREADS];
|
||||
struct timeval stop[MAX_THREADS];
|
||||
|
||||
static int N_THREADS;
|
||||
|
||||
void Sample_Start(int THR) {
|
||||
#pragma omp barrier
|
||||
gettimeofday(start + THR, (void *)0);
|
||||
}
|
||||
|
||||
void Sample_Stop(int THR) { gettimeofday(&(stop[THR]), (void *)0); }
|
||||
|
||||
void Sample_Init(int argc, char *argv[]) {
|
||||
|
||||
if (argc < 3) {
|
||||
printf("Sample parameters: NumberThreads \n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
N_THREADS = (int)atof(argv[1]);
|
||||
|
||||
if (!N_THREADS || N_THREADS > MAX_THREADS) {
|
||||
printf("Number of Threads is not valid\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
omp_set_num_threads(N_THREADS);
|
||||
}
|
||||
|
||||
int Sample_PAR_install() {
|
||||
int THR;
|
||||
|
||||
THR = omp_get_thread_num();
|
||||
|
||||
return THR;
|
||||
}
|
||||
|
||||
void Sample_End(const int *SZ) {
|
||||
int THR, i;
|
||||
|
||||
for (THR = 0; THR < N_THREADS; THR++) {
|
||||
printf("%1.0f,", (double)*SZ);
|
||||
printf("%1.0f,", (double)N_THREADS);
|
||||
printf("%1.0f,", (double)THR);
|
||||
stop[THR].tv_usec -= start[THR].tv_usec;
|
||||
if (stop[THR].tv_usec < 0) {
|
||||
stop[THR].tv_usec += 1000000;
|
||||
stop[THR].tv_sec--;
|
||||
}
|
||||
stop[THR].tv_sec -= start[THR].tv_sec;
|
||||
|
||||
printf("%1.0f\n", (double)(stop[THR].tv_sec * 1000000 + stop[THR].tv_usec));
|
||||
}
|
||||
}
|
5
MatMult/sample.h
Normal file
5
MatMult/sample.h
Normal file
|
@ -0,0 +1,5 @@
|
|||
extern void Sample_Init(int argc, char *argv[]);
|
||||
extern void Sample_Start(int THR);
|
||||
void Sample_Stop(int THR);
|
||||
int Sample_PAR_install(void);
|
||||
void Sample_End(const int *SZ);
|
60
README.md
Normal file
60
README.md
Normal file
|
@ -0,0 +1,60 @@
|
|||
# Parallel matrix multiplication
|
||||
|
||||
Performance tests for parallel matrix multiplication algorithms.
|
||||
|
||||
## Usage
|
||||
|
||||
1. Install the required python libraries, found in `requirements.txt`. The following command shows how to do it using `pip`, but `conda` or any package manager can also be used.
|
||||
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Build the matrix multiplication executables using `make`.
|
||||
|
||||
```shell
|
||||
cd MatMult
|
||||
make
|
||||
```
|
||||
|
||||
3. Run the experiments using the launcher.
|
||||
|
||||
```shell
|
||||
cd ..
|
||||
python launcher.py experiments.csv
|
||||
```
|
||||
|
||||
4. Generate the graphics, using the results of the experiments from every machine.
|
||||
|
||||
```shell
|
||||
python graphics.py experiments1.csv experiments2.csv experiments3.csv -o img/
|
||||
```
|
||||
|
||||
## Results
|
||||
|
||||
The experimentation results are summarized in the following figures.
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
In this case, the experiments were run in 3 different machines:
|
||||
|
||||
- A ml.c5.9xlarge AWS instance, with 36 vCPUs and 72 GiB RAM.
|
||||
- A ml.m5.8xlarge AWS instance, with 32 vCPUs and 128 GiB RAM.
|
||||
- A ml.r5.8xlarge AWS instance, with 32 vCPUs and 128 GiB RAM.
|
||||
|
||||
Every matrix size - number of threads combination was executed 30 times, in order to make the experiments statistically significant. The values in the graphics represent the mean values of those 30 repetitions.
|
||||
|
||||
### Metrics
|
||||
|
||||
The following metrics summarize the experimentation results, regarding the algorithms performance for different number of threads:
|
||||
|
||||
- Speed up: $$Sp = \frac{t_{serial}}{t_{parallel}}$$
|
||||
|
||||

|
||||
|
||||
- Efficiency: $$Ep = \frac{Sp}{p}$$
|
||||
|
||||

|
134
graphics.py
Normal file
134
graphics.py
Normal file
|
@ -0,0 +1,134 @@
|
|||
# Imports
|
||||
from argparse import ArgumentParser
|
||||
from os import path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from matplotlib.colors import LogNorm
|
||||
|
||||
# Parse execution arguments
|
||||
parser = ArgumentParser(description="Create graphics from the experiments data.")
|
||||
parser.add_argument(
|
||||
"input_files",
|
||||
help="CSV files to be read and processed. Should be outputs of launcher.py, from different machines",
|
||||
nargs="+",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--out", help="Folder where the images will be saved", required=True
|
||||
)
|
||||
args = parser.parse_args()
|
||||
input_files = args.input_files
|
||||
out_folder = args.out
|
||||
|
||||
# Read the data in every csv input and combine into single dataframe
|
||||
data = []
|
||||
for input_file in input_files:
|
||||
partial_data = pd.read_csv(input_file)
|
||||
partial_data["Machine"] = input_file.split(".")[0]
|
||||
data.append(partial_data)
|
||||
data = pd.concat(data, ignore_index=True)
|
||||
|
||||
# Transform time from microseconds to seconds
|
||||
data["Time (secs)"] = data["Time"] * 10**-6
|
||||
# Fix machine names to agree with aws documentation
|
||||
data["Machine"] = data.apply(lambda x: x["Machine"].replace("_", "."), axis=1)
|
||||
# Obtain list of unique machines and algorithms
|
||||
machines = data["Machine"].unique()
|
||||
algorithms = data["Algorithm"].unique()
|
||||
num_machines = len(machines)
|
||||
num_algorithms = len(algorithms)
|
||||
|
||||
# Create matplotlib figure to plot time heatmap
|
||||
fig, ax = plt.subplots(num_machines, num_algorithms)
|
||||
fig_width = 12
|
||||
fig.set_size_inches(fig_width, 3 * fig_width // 2)
|
||||
ax_idx = 0
|
||||
# Iterate over every machine and algorithm combination
|
||||
for machine in machines:
|
||||
for algorithm in algorithms:
|
||||
# Obtain axis position in figure
|
||||
position = divmod(ax_idx, num_algorithms)
|
||||
# Create time heatmap, with matrix vs threads dimensions
|
||||
heatmap = data.query(f"Algorithm == '{algorithm}' and Machine == '{machine}'")
|
||||
heatmap = heatmap.pivot_table(
|
||||
values="Time (secs)",
|
||||
index="Matrix_Size",
|
||||
columns="N_Threads",
|
||||
aggfunc="mean",
|
||||
)
|
||||
sns.heatmap(heatmap, norm=LogNorm(), ax=ax[*position])
|
||||
ax[*position].set_title(f"Machine={machine} | Algorithm={algorithm}")
|
||||
ax_idx += 1
|
||||
fig.suptitle(
|
||||
"Time (secs) for every threads-size combination", fontsize="xx-large", x=0.5, y=0.92
|
||||
)
|
||||
# Save figure
|
||||
plt.savefig(path.join(out_folder, "size-threads-time.png"))
|
||||
# Clear figure
|
||||
plt.clf()
|
||||
plt.cla()
|
||||
|
||||
# Define data subset with max number of threads and matrix size
|
||||
data_size2k_threads20 = data.query("Matrix_Size == 2000 and N_Threads == 20")
|
||||
# Create matplotlib figure to plot time heatmap
|
||||
fig, ax = plt.subplots(num_machines, num_algorithms)
|
||||
fig_width = 12
|
||||
fig.set_size_inches(fig_width, 3 * fig_width // 2)
|
||||
ax_idx = 0
|
||||
# Iterate over every machine and algorithm combination
|
||||
for machine in machines:
|
||||
for algorithm in algorithms:
|
||||
# Obtain axis position in figure
|
||||
position = divmod(ax_idx, num_algorithms)
|
||||
# Create time distribution plot as violinplot
|
||||
dist = data_size2k_threads20.query(
|
||||
f"Algorithm == '{algorithm}' and Machine == '{machine}'"
|
||||
)
|
||||
sns.violinplot(dist, y="Time (secs)", inner="quart", ax=ax[*position])
|
||||
ax[*position].set_title(f"Machine={machine} | Algorithm={algorithm}")
|
||||
ax_idx += 1
|
||||
fig.suptitle(
|
||||
"Time (secs) distribution for Matrix_Size=2000, N_Threads=20",
|
||||
fontsize="xx-large",
|
||||
x=0.5,
|
||||
y=0.92,
|
||||
)
|
||||
# Save figure
|
||||
plt.savefig(path.join(out_folder, "distribution.png"))
|
||||
# Clear figure
|
||||
plt.clf()
|
||||
plt.cla()
|
||||
|
||||
# Plot threads vs time lineplots
|
||||
ax = sns.relplot(
|
||||
data,
|
||||
x="N_Threads",
|
||||
y="Time (secs)",
|
||||
hue="Matrix_Size",
|
||||
row="Machine",
|
||||
col="Algorithm",
|
||||
palette="plasma",
|
||||
kind="line",
|
||||
)
|
||||
# Save figure
|
||||
plt.savefig(path.join(out_folder, "threads-time.png"))
|
||||
# Clear figure
|
||||
plt.clf()
|
||||
plt.cla()
|
||||
|
||||
# Plot matrix_size vs time lineplots
|
||||
data["N_Threads"] = data["N_Threads"].astype(str)
|
||||
ax = sns.relplot(
|
||||
data,
|
||||
x="Matrix_Size",
|
||||
y="Time (secs)",
|
||||
hue="N_Threads",
|
||||
row="Machine",
|
||||
col="Algorithm",
|
||||
palette="plasma",
|
||||
kind="line",
|
||||
)
|
||||
# Save figure
|
||||
plt.savefig(path.join(out_folder, "size-time.png"))
|
||||
# Program end
|
BIN
img/distribution.png
Normal file
BIN
img/distribution.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 190 KiB |
BIN
img/efficiency.png
Normal file
BIN
img/efficiency.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 30 KiB |
BIN
img/size-threads-time.png
Normal file
BIN
img/size-threads-time.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 117 KiB |
BIN
img/size-time.png
Normal file
BIN
img/size-time.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 139 KiB |
BIN
img/speedup.png
Normal file
BIN
img/speedup.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 35 KiB |
BIN
img/threads-time.png
Normal file
BIN
img/threads-time.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 124 KiB |
52
launcher.py
Normal file
52
launcher.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
from argparse import ArgumentParser
|
||||
from os import popen
|
||||
|
||||
from pandas import DataFrame
|
||||
from tqdm import trange
|
||||
|
||||
|
||||
def single_experiment(matrix_size, threads):
|
||||
data = []
|
||||
executables = ("MM1c", "MM1r")
|
||||
algorithms = ("row-column", "row-row")
|
||||
for executable, algorithm in zip(executables, algorithms):
|
||||
stream = popen(f"./{executable} {matrix_size} {threads} 0")
|
||||
for line in stream.readlines():
|
||||
values = line.strip().split(",")
|
||||
values.append(algorithm)
|
||||
data.append(values)
|
||||
return data
|
||||
|
||||
|
||||
def all_experiments(matrix_sizes, threads, repetitions):
|
||||
data = []
|
||||
for matrix_size in matrix_sizes:
|
||||
for thread in threads:
|
||||
for _ in trange(
|
||||
repetitions,
|
||||
desc=f"Size: {matrix_size}, {thread} threads",
|
||||
unit="exec",
|
||||
):
|
||||
data.extend(single_experiment(matrix_size, thread))
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser(
|
||||
description="Run matrix multiplication experiments and save data as CSV.",
|
||||
epilog="The output of this program should be used with graphics.py",
|
||||
)
|
||||
parser.add_argument(
|
||||
"output_file",
|
||||
help="CSV file where the results will be saved",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
matrix_sizes = range(200, 2001, 200)
|
||||
threads = range(2, 21, 2)
|
||||
repetitions = 30
|
||||
|
||||
data = all_experiments(matrix_sizes, threads, repetitions)
|
||||
columns = ["Matrix_Size", "N_Threads", "Thread", "Time", "Algorithm"]
|
||||
data = DataFrame(data, columns=columns)
|
||||
data.to_csv(args.output_file, index=False)
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
tqdm
|
||||
pandas
|
||||
seaborn
|
Loading…
Add table
Add a link
Reference in a new issue