chore: Restructure

# Conflicts: # config/measure.py # results/compression_results_auto_small.csv
2025-12-18 11:18:39 +01:00 · 2025-12-18 11:18:39 +01:00 · 2f869a8a7a
commit 2f869a8a7a
parent dd938489ef
41 changed files with 8 additions and 21 deletions
--- a/results/make_graphs.py
+++ b/results/make_graphs.py
@ -0,0 +1,132 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+
+if __name__ == "__main__":
+    # read in the csv
+    df = pd.read_csv("compression_results.csv")
+
+    for dataset_type in df["dataset_type"].unique():
+        for model_type in df["model_type"].unique():
+            dataset_df = df[df["dataset_type"] == dataset_type]
+            model_df = dataset_df[dataset_df["model_type"] == model_type].copy()
+
+            # execution time
+            plt.figure()
+            model_df["original_file_size_mb"] = model_df["original_file_size"] / 1e6
+            model_df["compression_time_s"] = model_df["compression_time"] / 1e9
+            model_df["decompression_time_s"] = model_df["decompression_time"] / 1e9
+            # compression
+            sns.lineplot(
+                data=model_df,
+                x="original_file_size_mb",
+                y="compression_time_s",
+                hue="context_length",
+                palette="Set1",
+                markers=True,
+                legend="brief",
+                linestyle="-"
+            )
+            # decompression
+            sns.lineplot(
+                data=model_df,
+                x="original_file_size_mb",
+                y="decompression_time_s",
+                hue="context_length",
+                palette="Set1",
+                markers=True,
+                legend=False,
+                linestyle="--"
+            )
+            plt.title(f"{model_type.capitalize()} compression and decompression time: {dataset_type}")
+            plt.xlabel("file size [MB]")
+            plt.ylabel("Time [s]")
+            plt.yscale("log")
+            plt.legend([f"{style}, {c_type}" for style, c_type in zip(["Solid", "Dashed"], ["compression", "decompression"])])
+            plt.tight_layout()
+            plt.savefig(f"./graphs/{model_type}_{dataset_type}_execution_time.png")
+
+            # compression ratio
+            plt.figure()
+            c256 = model_df[model_df["context_length"] == 256]
+            c128 = model_df[model_df["context_length"] == 128]
+
+            plt.plot(c256["original_file_size"] / 1e6, c256["compressed_file_size"] / 1e6, label="256")
+            plt.plot(c128["original_file_size"] / 1e6, c128["compressed_file_size"] / 1e6, label="128")
+            plt.title(f"{model_type.capitalize()} compressed file evolution: {dataset_type}")
+            plt.xlabel("Original file size [MB]")
+            plt.ylabel("Compressed file size [MB]")
+            plt.legend()
+            plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png")
+
+
+            # if model_type == "cnn":
+            #     import numpy as np
+            #
+            #     plt.figure()
+            #     for length, linestyle in [(128, '-'), (256, '--')]:
+            #         # extrapolate execution time to larger files
+            #         x = model_df[model_df["context_length"] == length]["original_file_size"] / 1e6
+            #         y = model_df[model_df["context_length"] == length]["compression_time"]
+            #         y_decom = model_df[model_df["context_length"] == length]["decompression_time"]
+            #
+            #         b1, loga1 = np.polyfit(x, np.log(y), 1)
+            #         b2, loga2 = np.polyfit(x, np.log(y_decom), 1)
+            #
+            #         x_comp = np.linspace(0, 40, 1000)
+            #         x_decomp = np.linspace(0, 40, 1000)
+            #         a1 = np.exp(loga1)
+            #         a2 = np.exp(loga2)
+            #
+            #
+            #         plt.plot(
+            #             x_comp, a1 * np.exp(x_comp),
+            #             label=f"{length} compression",
+            #             linestyle=linestyle
+            #         )
+            #         plt.plot(
+            #             x_decomp, a2 * np.exp(x_decomp),
+            #             label=f"{length} decompression",
+            #             linestyle=linestyle
+            #         )
+            #
+            #
+            #
+            #     plt.legend()
+            #     plt.title(f"Extrapolated execution time for CNN compression and decompression")
+            #     plt.xlabel("File size [MB]")
+            #     plt.ylabel("Time [s]")
+            #     plt.tight_layout()
+            #     plt.savefig(f"./graphs/{model_type}_{dataset_type}_extrapolated_execution_time.png")
+
+    for model_type in df["model_type"].unique():
+        model_df = df[df["model_type"] == model_type]
+
+        plt.figure(figsize=(10, 4))
+        bar_height = 0.25
+        files = model_df["input_file_name"].unique()
+        y = np.arange(len(files))
+        c256 = model_df[model_df["context_length"] == 256]
+        c128 = model_df[model_df["context_length"] == 128]
+
+        plt.barh(
+            y - bar_height / 2,
+            c256["mse_loss"],
+            height=bar_height,
+            label="256"
+        )
+
+        plt.barh(
+            y + bar_height / 2,
+            c128["mse_loss"],
+            height=bar_height,
+            label="128"
+        )
+        plt.yticks(y, files, rotation=45, ha="right")
+        plt.title(f"MSE loss for different context lengths")
+        plt.xlabel("MSE loss")
+        plt.ylabel("Filename")
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f"./graphs/{model_type}_loss.png")