From d663f270e1a1dec1c1feade9cb18cc2e0a66c8bf Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Fri, 19 Dec 2025 17:53:41 +0100 Subject: [PATCH] feat: Improved plots --- results/make_graphs.py | 123 ++++++++++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 25 deletions(-) diff --git a/results/make_graphs.py b/results/make_graphs.py index 3f328e5..592ac6d 100644 --- a/results/make_graphs.py +++ b/results/make_graphs.py @@ -1,3 +1,4 @@ +import glob import os import matplotlib.pyplot as plt @@ -67,7 +68,32 @@ def compression_ratios(df: pd.DataFrame, unique_labels, palette_dict) -> Figure: plt.yticks(rotation=45, ha="right") - ax.grid(True) + ax.grid(True, alpha=0.3) + + return plt.gcf() + + +def compression_ratio_v_compression_time(df: pd.DataFrame, unique_labels, palette_dict, markers_dict) -> Figure: + plt.figure() + + sns.scatterplot( + data=df, + x=RATE_COL, + y=COMPRESS_TIME_COL, + hue=LABEL_COL, + hue_order=unique_labels, + palette=palette_dict, + style=LABEL_COL, + style_order=unique_labels, + markers=markers_dict, + ) + + plt.legend(title='Compressor') + + plt.xlabel('Compression ratio') + plt.ylabel('Compression time (s)') + + plt.grid(True, alpha=0.3) return plt.gcf() @@ -120,25 +146,35 @@ def filesize_v_mse(df: pd.DataFrame) -> Figure: size = row[INPUT_SIZE_COL] return f"{filename} ({size:.4f} MB)" - df['input_filename_size'] = df.apply(filename_and_size, axis=1) + def size(row): + full_name = row['input_filename'] + # Strip prefix + size_name = full_name.lstrip('text').lstrip('genome').rstrip('txt').rstrip('fna') + size_name = f"*{size_name}*" + return size_name - fig, ax = plt.subplots() - sns.barplot( + df['input_filename_size'] = df.apply(filename_and_size, axis=1) + df['input_size'] = df.apply(size, axis=1) + + g = sns.catplot( data=df, - y='input_filename', + kind="bar", x=DISTORTION_COL, + y='input_size', + col='training_dataset', hue=CONTEXT_COL, - ax=ax, - palette='Set2' + palette='Set2', + height=5, + aspect=0.6 ) - plt.title('MSE for autoencoder') - plt.xlabel('MSE') - plt.ylabel('Filename') - plt.yticks(rotation=45, ha="right") - plt.legend(title='Context size') + g.set_axis_labels("MSE Loss", "Filename") + g.set_titles("Autoencoder trained on {col_name}") - plt.grid(True) + # plt.title('MSE for autoencoder') + # plt.yticks(rotation=45, ha="right") + # plt.legend(title='Context size') + g.tight_layout() return plt.gcf() @@ -164,7 +200,7 @@ def mse_losses(df: pd.DataFrame, unique_labels, palette_dict) -> Figure: plt.yticks(rotation=45, ha="right") - ax.grid(True) + ax.grid(True, alpha=0.3) return plt.gcf() @@ -238,8 +274,8 @@ def split_graph( f.text(0.5, 0, x_axis_label, ha='center', va='center') ax_left.set_ylabel(y_axis_label) - ax_left.grid(True) - ax_right.grid(True) + ax_left.grid(True, alpha=0.3) + ax_right.grid(True, alpha=0.3) plt.tight_layout() return f, ax_left, ax_right @@ -247,7 +283,7 @@ def split_graph( def generate( df: pd.DataFrame, unique_labels, palette_dict, markers_dict, - tgt_dir: str, dpi: int = 300 + tgt_dir: str ) -> None: """Generate all the plots""" # Make plots @@ -270,6 +306,11 @@ def generate( os.path.join(tgt_dir, 'compression_ratios.png'), bbox_inches='tight' ) + compression_ratio_v_compression_time(df, unique_labels, palette_dict, markers_dict).savefig( + os.path.join(tgt_dir, 'compression_ratio_v_compression_time.png'), + bbox_inches='tight' + ) + filesize_v_mse(df).savefig( os.path.join(tgt_dir, 'filesize_mse.png'), bbox_inches='tight' @@ -280,7 +321,7 @@ def generate( ) -def setup(tgt_dir): +def setup(tgt_dir, dpi = 300): # Create the targ directory if it does not exist os.makedirs(tgt_dir, exist_ok=True) @@ -288,7 +329,7 @@ def setup(tgt_dir): params = {'text.usetex': True, 'font.size': 11, 'font.family': 'serif', - 'figure.dpi': 300, + 'figure.dpi': dpi, } plt.rcParams.update(params) @@ -327,8 +368,8 @@ def main(): df = pd.read_csv("measurements.csv") tgt_dir = "figures" - setup(tgt_dir) - generate(*preprocessing(df), tgt_dir=tgt_dir, dpi=150) + setup(tgt_dir, 300) + generate(*preprocessing(df), tgt_dir=tgt_dir) def old_results(): @@ -395,7 +436,7 @@ def old_results(): # plt.title(f"{model_type.capitalize()} compressed file evolution: {dataset_type}") plt.xlabel("Original file size (MB)") plt.ylabel("Compressed file size (MB)") - plt.ylim(0, model_df["compressed_file_size"].max() / 1e6) + plt.ylim(0, df[df["model_type"] == model_type]["compressed_file_size"].max() / 1e6) plt.legend(title="Context size") plt.tight_layout() plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png") @@ -428,7 +469,7 @@ def old_results(): linestyle=linestyle ) - plt.grid(True) + plt.grid(True, alpha=0.3) plt.legend() plt.title(f"(Log-linear) Extrapolated execution time for CNN") # plt.xscale('log') @@ -466,11 +507,43 @@ def old_results(): plt.xlabel("MSE loss") plt.ylabel("Filename") plt.legend() - plt.grid(True) + plt.grid(True, alpha=0.3) plt.tight_layout() plt.savefig(f"./graphs/{model_type}_loss.png") +def training_loss(df, loss) -> Figure: + plt.figure(figsize=(4, 3)) + + plt.plot(df['train_loss'], label="Training loss") + plt.plot(df['validation_loss'], label="Validation losses") + + plt.xlabel("Epoch") + plt.ylabel(loss) + + if loss == 'MSE Loss': + ylim = 0.01 + else: + ylim = 6 + plt.ylim(0, ylim) + + plt.legend() + plt.tight_layout() + + return plt.gcf() + + +def make_training_graphs(models_dir, loss): + for csv in glob.glob(models_dir + '/*.csv'): + df = pd.read_csv(csv) + training_loss(df, loss).savefig( + csv.replace('.csv', '.png'), + bbox_inches='tight', + ) + + if __name__ == "__main__": main() - old_results() + # old_results() + make_training_graphs('../models/autoencoder', 'MSE Loss') + make_training_graphs('../models/cnn', 'Cross Entropy Loss')