2025ML-project-neural_compr.../make_graphs.py

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

if __name__ == "__main__":
    # read in the csv
    df = pd.read_csv("./results/compress/compression_results.csv")

    for dataset_type in df["dataset_type"].unique():
        for model_type in df["model_type"].unique():
            dataset_df = df[df["dataset_type"] == dataset_type]
            model_df = dataset_df[dataset_df["model_type"] == model_type]

            # execution time
            plt.figure()
            grouped = model_df.groupby("context_length")["compression_time"].mean() / 1e9
            labels = grouped.index.astype(str)  # "128", "256"
            x = np.arange(len(labels))  # [0, 1]

            plt.bar(x, grouped.values, width=0.6)
            plt.title(f"{model_type.capitalize()} mean compression time")
            plt.xticks(x, labels)
            plt.xlabel("Context length")
            plt.ylabel("Mean compression time [s]")
            plt.tight_layout()
            plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_time.png")

            plt.figure()
            grouped = model_df.groupby("context_length")["decompression_time"].mean() / 1e9
            labels = grouped.index.astype(str)  # "128", "256"
            x = np.arange(len(labels))  # [0, 1]

            plt.bar(x, grouped.values, width=0.6)
            plt.title(f"{model_type.capitalize()} mean decompression time")
            plt.xticks(x, labels)
            plt.xlabel("Context length")
            plt.ylabel("Mean decompression time [s]")
            plt.tight_layout()
            plt.savefig(f"./graphs/{model_type}_{dataset_type}_decompression_time.png")

            # loss
            plt.figure(figsize=(10, 4))
            bar_height = 0.25
            files = model_df["input_file_name"].unique()
            y = np.arange(len(files))
            c256 = model_df[model_df["context_length"] == 256]
            c128 = model_df[model_df["context_length"] == 128]

            plt.barh(
                y - bar_height / 2,
                c256["mse_loss"],
                height=bar_height,
                label="256"
            )

            plt.barh(
                y + bar_height / 2,
                c128["mse_loss"],
                height=bar_height,
                label="128"
            )
            plt.yticks(y, files, rotation=45, ha="right")
            plt.title(f"{model_type.capitalize()} MSE loss for different context lengths")
            plt.xlabel("MSE loss")
            plt.ylabel("Filename")
            plt.legend()
            plt.tight_layout()
            plt.savefig(f"./graphs/{model_type}_{dataset_type}_accuracy.png")

            # compression ratio
            plt.figure()
            c256 = model_df[model_df["context_length"] == 256]
            c128 = model_df[model_df["context_length"] == 128]

            plt.plot(c256["original_file_size"] / 1e6, c256["compressed_file_size"] / 1e6, label="256")
            plt.plot(c128["original_file_size"] / 1e6, c128["compressed_file_size"] / 1e6, label="128")
            plt.title(f"{model_type.capitalize()} compressed file evolution")
            plt.xlabel("Original file size [MB]")
            plt.ylabel("Compressed file size [MB]")
            plt.legend()
            plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png")