🎉 First commit

2025-05-29 20:26:11 -05:00 · 2025-05-29 20:26:11 -05:00 · 3b8d21dde2
commit 3b8d21dde2
17 changed files with 571 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,55 @@
+# Prerequisites
+*.d
+
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Linker output
+*.ilk
+*.map
+*.exp
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+*.su
+*.idb
+*.pdb
+
+# Kernel Module Compile Results
+*.mod*
+*.cmd
+.tmp_versions/
+modules.order
+Module.symvers
+Mkfile.old
+dkms.conf
+
+# Experiments data
+*.csv
--- a/5
+++ b/5
@ -0,0 +1,5 @@
+Copyright (c) 2024 Nicolas Rojas
+
+Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--- a/MatMult/MM1c.c
+++ b/MatMult/MM1c.c
@ -0,0 +1,86 @@
+/************************************************************************
+ * Autor: J. Corredor
+ * Fecha: Octubre 2023
+ * Computación de Alto Rendimiento
+ * Maestría en Inteligencia Artificial
+ * Tema: Programa de Multiplicación de Matrices usando hilos OpenMP
+ * -Algorimo Clásico filasXcolumnas
+ *************************************************************************/
+
+#include "sample.h"
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MIN
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
+
+#define DATA_SZ (1024 * 1024 * 64 * 3)
+
+static double MEM_CHUNK[DATA_SZ];
+
+void Matrix_Init_col(int SZ, double *a, double *b, double *c) {
+  int j, k;
+  for (j = 0; j < SZ; j++) {
+    a[j + k * SZ] = 2.0 * (j + k);
+    b[j + k * SZ] = 3.2 * (j - k);
+    c[j + k * SZ] = 1.0;
+  }
+}
+
+int main(int argc, char **argv) {
+  int N;
+
+  if (argc < 2) {
+    printf("MM1c MatrixSize [Sample arguments ...]\n");
+    return -1;
+  }
+
+  N = (int)atof(argv[1]);
+  argc--;
+  argv++;
+
+  if (N > 1024 * 10) {
+    printf("Unvalid MatrixSize\n");
+    return -1;
+  }
+
+  Sample_Init(argc, argv);
+
+#pragma omp parallel
+  {
+    int NTHR, THR, SZ;
+    int i, j, k;
+    double *a, *b, *c;
+
+    SZ = N;
+    THR = Sample_PAR_install();
+    NTHR = omp_get_num_threads();
+
+    a = MEM_CHUNK;
+    b = a + SZ * SZ;
+    c = b + SZ * SZ;
+
+#pragma omp master
+    Matrix_Init_col(SZ, a, b, c);
+
+    Sample_Start(THR);
+
+#pragma omp for
+    for (i = 0; i < SZ; i++)
+      for (j = 0; j < SZ; j++) {
+        double *pA, *pB, S;
+        S = 0.0;
+        pA = a + (i * SZ);
+        pB = b + j;
+        for (k = SZ; k > 0; k--, pA++, pB += SZ)
+          S += (*pA * *pB);
+        c[i * SZ + j] = S;
+      }
+
+    Sample_Stop(THR);
+  }
+
+  Sample_End(&N);
+}
--- a/MatMult/MM1r.c
+++ b/MatMult/MM1r.c
@ -0,0 +1,88 @@
+/************************************************************************
+ * Autor: N Rojas
+ * Fecha: Noviembre 2023
+ * Computación de Alto Rendimiento
+ * Maestría en Inteligencia Artificial
+ * Tema: Programa de Multiplicación de Matrices usando hilos OpenMP
+ * -Algorimo filasXfilas
+ *************************************************************************/
+
+#include "sample.h"
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MIN
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
+
+#define DATA_SZ (1024 * 1024 * 64 * 3)
+
+static double MEM_CHUNK[DATA_SZ];
+
+void Matrix_Init_col(int SZ, double *a, double *b, double *c) {
+  int j, k;
+  for (j = 0; j < SZ; j++) {
+    a[j + k * SZ] = 2.0 * (j + k);
+    b[j + k * SZ] = 3.2 * (j - k);
+    c[j + k * SZ] = 0.0;
+  }
+}
+
+int main(int argc, char **argv) {
+  int N;
+
+  if (argc < 2) {
+    printf("MM1r MatrixSize [Sample arguments ...]\n");
+    return -1;
+  }
+
+  N = (int)atof(argv[1]);
+  argc--;
+  argv++;
+
+  if (N > 1024 * 10) {
+    printf("Unvalid MatrixSize\n");
+    return -1;
+  }
+
+  Sample_Init(argc, argv);
+
+#pragma omp parallel
+  {
+    int NTHR, THR, SZ;
+    int i, j, k;
+    double *a, *b, *c;
+
+    SZ = N;
+    THR = Sample_PAR_install();
+    NTHR = omp_get_num_threads();
+
+    a = MEM_CHUNK;
+    b = a + SZ * SZ;
+    c = b + SZ * SZ;
+
+#pragma omp master
+    Matrix_Init_col(SZ, a, b, c);
+
+    Sample_Start(THR);
+
+#pragma omp for
+    for (i = 0; i < SZ; ++i) {
+      double *pA, *pB, S;
+      pA = a + (i * SZ);
+      for (j = 0; j < SZ; ++j) {
+        pB = b + (j * SZ);
+        for (k = 0; k < SZ; ++k, ++pB) {
+          S = *pA * *pB;
+          c[i * SZ + k] += S;
+        }
+        ++pA;
+      }
+    }
+
+    Sample_Stop(THR);
+  }
+
+  Sample_End(&N);
+}
--- a/MatMult/Makefile
+++ b/MatMult/Makefile
@ -0,0 +1,20 @@
+GCC = gcc
+oT = -fopenmp -O3
+
+CFLAGS = -lm
+
+oL= Otime.c
+
+BINDIR = ../
+PROGS = $(BINDIR)MM1c $(BINDIR)MM1r
+
+all: MM1c MM1r
+
+clean:
+	$(RM) $(PROGS)
+
+MM1c:
+	$(GCC) $(oT) $(oL) $@.c -o $(BINDIR)$@ $(CFLAGS)
+
+MM1r:
+	$(GCC) $(oT) $(oL) $@.c -o $(BINDIR)$@ $(CFLAGS)
--- a/MatMult/Otime.c
+++ b/MatMult/Otime.c
@ -0,0 +1,63 @@
+#include <errno.h>
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#define MAX_THREADS 20
+
+struct timeval start[MAX_THREADS];
+struct timeval stop[MAX_THREADS];
+
+static int N_THREADS;
+
+void Sample_Start(int THR) {
+#pragma omp barrier
+  gettimeofday(start + THR, (void *)0);
+}
+
+void Sample_Stop(int THR) { gettimeofday(&(stop[THR]), (void *)0); }
+
+void Sample_Init(int argc, char *argv[]) {
+
+  if (argc < 3) {
+    printf("Sample parameters: NumberThreads \n");
+    exit(1);
+  }
+
+  N_THREADS = (int)atof(argv[1]);
+
+  if (!N_THREADS || N_THREADS > MAX_THREADS) {
+    printf("Number of Threads is not valid\n");
+    exit(1);
+  }
+
+  omp_set_num_threads(N_THREADS);
+}
+
+int Sample_PAR_install() {
+  int THR;
+
+  THR = omp_get_thread_num();
+
+  return THR;
+}
+
+void Sample_End(const int *SZ) {
+  int THR, i;
+
+  for (THR = 0; THR < N_THREADS; THR++) {
+    printf("%1.0f,", (double)*SZ);
+    printf("%1.0f,", (double)N_THREADS);
+    printf("%1.0f,", (double)THR);
+    stop[THR].tv_usec -= start[THR].tv_usec;
+    if (stop[THR].tv_usec < 0) {
+      stop[THR].tv_usec += 1000000;
+      stop[THR].tv_sec--;
+    }
+    stop[THR].tv_sec -= start[THR].tv_sec;
+
+    printf("%1.0f\n", (double)(stop[THR].tv_sec * 1000000 + stop[THR].tv_usec));
+  }
+}
--- a/MatMult/sample.h
+++ b/MatMult/sample.h
@ -0,0 +1,5 @@
+extern void Sample_Init(int argc, char *argv[]);
+extern void Sample_Start(int THR);
+void Sample_Stop(int THR);
+int Sample_PAR_install(void);
+void Sample_End(const int *SZ);
--- a/README.md
+++ b/README.md
@ -0,0 +1,60 @@
+# Parallel matrix multiplication
+
+Performance tests for parallel matrix multiplication algorithms.
+
+## Usage
+
+1. Install the required python libraries, found in `requirements.txt`. The following command shows how to do it using `pip`, but `conda` or any package manager can also be used.
+
+```shell
+pip install -r requirements.txt
+```
+
+2. Build the matrix multiplication executables using `make`.
+
+```shell
+cd MatMult
+make
+```
+
+3. Run the experiments using the launcher.
+
+```shell
+cd ..
+python launcher.py experiments.csv
+```
+
+4. Generate the graphics, using the results of the experiments from every machine.
+
+```shell
+python graphics.py experiments1.csv experiments2.csv experiments3.csv -o img/
+```
+
+## Results
+
+The experimentation results are summarized in the following figures.
+
+![Matrix size vs Time line plot](img/size-time.png)
+![Threads vs Time line plot](img/threads-time.png)
+![Matrix size vs Threads heatmap](img/size-threads-time.png)
+![Matrix size vs Threads heatmap](img/distribution.png)
+
+In this case, the experiments were run in 3 different machines:
+
+- A ml.c5.9xlarge AWS instance, with 36 vCPUs and 72 GiB RAM.
+- A ml.m5.8xlarge AWS instance, with 32 vCPUs and 128 GiB RAM.
+- A ml.r5.8xlarge AWS instance, with 32 vCPUs and 128 GiB RAM.
+
+Every matrix size - number of threads combination was executed 30 times, in order to make the experiments statistically significant. The values in the graphics represent the mean values of those 30 repetitions.
+
+### Metrics
+
+The following metrics summarize the experimentation results, regarding the algorithms performance for different number of threads:
+
+- Speed up: $$Sp = \frac{t_{serial}}{t_{parallel}}$$
+
+![SpeedUp](img/speedup.png)
+
+- Efficiency: $$Ep = \frac{Sp}{p}$$
+
+![Efficiency](img/efficiency.png)
--- a/graphics.py
+++ b/graphics.py
@ -0,0 +1,134 @@
+# Imports
+from argparse import ArgumentParser
+from os import path
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from matplotlib.colors import LogNorm
+
+# Parse execution arguments
+parser = ArgumentParser(description="Create graphics from the experiments data.")
+parser.add_argument(
+    "input_files",
+    help="CSV files to be read and processed. Should be outputs of launcher.py, from different machines",
+    nargs="+",
+)
+parser.add_argument(
+    "-o", "--out", help="Folder where the images will be saved", required=True
+)
+args = parser.parse_args()
+input_files = args.input_files
+out_folder = args.out
+
+# Read the data in every csv input and combine into single dataframe
+data = []
+for input_file in input_files:
+    partial_data = pd.read_csv(input_file)
+    partial_data["Machine"] = input_file.split(".")[0]
+    data.append(partial_data)
+data = pd.concat(data, ignore_index=True)
+
+# Transform time from microseconds to seconds
+data["Time (secs)"] = data["Time"] * 10**-6
+# Fix machine names to agree with aws documentation
+data["Machine"] = data.apply(lambda x: x["Machine"].replace("_", "."), axis=1)
+# Obtain list of unique machines and algorithms
+machines = data["Machine"].unique()
+algorithms = data["Algorithm"].unique()
+num_machines = len(machines)
+num_algorithms = len(algorithms)
+
+# Create matplotlib figure to plot time heatmap
+fig, ax = plt.subplots(num_machines, num_algorithms)
+fig_width = 12
+fig.set_size_inches(fig_width, 3 * fig_width // 2)
+ax_idx = 0
+# Iterate over every machine and algorithm combination
+for machine in machines:
+    for algorithm in algorithms:
+        # Obtain axis position in figure
+        position = divmod(ax_idx, num_algorithms)
+        # Create time heatmap, with matrix vs threads dimensions
+        heatmap = data.query(f"Algorithm == '{algorithm}' and Machine == '{machine}'")
+        heatmap = heatmap.pivot_table(
+            values="Time (secs)",
+            index="Matrix_Size",
+            columns="N_Threads",
+            aggfunc="mean",
+        )
+        sns.heatmap(heatmap, norm=LogNorm(), ax=ax[*position])
+        ax[*position].set_title(f"Machine={machine} | Algorithm={algorithm}")
+        ax_idx += 1
+fig.suptitle(
+    "Time (secs) for every threads-size combination", fontsize="xx-large", x=0.5, y=0.92
+)
+# Save figure
+plt.savefig(path.join(out_folder, "size-threads-time.png"))
+# Clear figure
+plt.clf()
+plt.cla()
+
+# Define data subset with max number of threads and matrix size
+data_size2k_threads20 = data.query("Matrix_Size == 2000 and N_Threads == 20")
+# Create matplotlib figure to plot time heatmap
+fig, ax = plt.subplots(num_machines, num_algorithms)
+fig_width = 12
+fig.set_size_inches(fig_width, 3 * fig_width // 2)
+ax_idx = 0
+# Iterate over every machine and algorithm combination
+for machine in machines:
+    for algorithm in algorithms:
+        # Obtain axis position in figure
+        position = divmod(ax_idx, num_algorithms)
+        # Create time distribution plot as violinplot
+        dist = data_size2k_threads20.query(
+            f"Algorithm == '{algorithm}' and Machine == '{machine}'"
+        )
+        sns.violinplot(dist, y="Time (secs)", inner="quart", ax=ax[*position])
+        ax[*position].set_title(f"Machine={machine} | Algorithm={algorithm}")
+        ax_idx += 1
+fig.suptitle(
+    "Time (secs) distribution for Matrix_Size=2000, N_Threads=20",
+    fontsize="xx-large",
+    x=0.5,
+    y=0.92,
+)
+# Save figure
+plt.savefig(path.join(out_folder, "distribution.png"))
+# Clear figure
+plt.clf()
+plt.cla()
+
+# Plot threads vs time lineplots
+ax = sns.relplot(
+    data,
+    x="N_Threads",
+    y="Time (secs)",
+    hue="Matrix_Size",
+    row="Machine",
+    col="Algorithm",
+    palette="plasma",
+    kind="line",
+)
+# Save figure
+plt.savefig(path.join(out_folder, "threads-time.png"))
+# Clear figure
+plt.clf()
+plt.cla()
+
+# Plot matrix_size vs time lineplots
+data["N_Threads"] = data["N_Threads"].astype(str)
+ax = sns.relplot(
+    data,
+    x="Matrix_Size",
+    y="Time (secs)",
+    hue="N_Threads",
+    row="Machine",
+    col="Algorithm",
+    palette="plasma",
+    kind="line",
+)
+# Save figure
+plt.savefig(path.join(out_folder, "size-time.png"))
+# Program end
--- a/img/distribution.png
+++ b/img/distribution.png
--- a/img/efficiency.png
+++ b/img/efficiency.png
--- a/img/size-threads-time.png
+++ b/img/size-threads-time.png
--- a/img/size-time.png
+++ b/img/size-time.png
--- a/img/speedup.png
+++ b/img/speedup.png
--- a/img/threads-time.png
+++ b/img/threads-time.png
--- a/launcher.py
+++ b/launcher.py
@ -0,0 +1,52 @@
+from argparse import ArgumentParser
+from os import popen
+
+from pandas import DataFrame
+from tqdm import trange
+
+
+def single_experiment(matrix_size, threads):
+    data = []
+    executables = ("MM1c", "MM1r")
+    algorithms = ("row-column", "row-row")
+    for executable, algorithm in zip(executables, algorithms):
+        stream = popen(f"./{executable} {matrix_size} {threads} 0")
+        for line in stream.readlines():
+            values = line.strip().split(",")
+            values.append(algorithm)
+            data.append(values)
+    return data
+
+
+def all_experiments(matrix_sizes, threads, repetitions):
+    data = []
+    for matrix_size in matrix_sizes:
+        for thread in threads:
+            for _ in trange(
+                repetitions,
+                desc=f"Size: {matrix_size}, {thread} threads",
+                unit="exec",
+            ):
+                data.extend(single_experiment(matrix_size, thread))
+    return data
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Run matrix multiplication experiments and save data as CSV.",
+        epilog="The output of this program should be used with graphics.py",
+    )
+    parser.add_argument(
+        "output_file",
+        help="CSV file where the results will be saved",
+    )
+    args = parser.parse_args()
+
+    matrix_sizes = range(200, 2001, 200)
+    threads = range(2, 21, 2)
+    repetitions = 30
+
+    data = all_experiments(matrix_sizes, threads, repetitions)
+    columns = ["Matrix_Size", "N_Threads", "Thread", "Time", "Algorithm"]
+    data = DataFrame(data, columns=columns)
+    data.to_csv(args.output_file, index=False)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+tqdm
+pandas
+seaborn