import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np if __name__ == "__main__": # read in the csv df = pd.read_csv("./results/compress/compression_results.csv") for dataset_type in df["dataset_type"].unique(): for model_type in df["model_type"].unique(): dataset_df = df[df["dataset_type"] == dataset_type] model_df = dataset_df[dataset_df["model_type"] == model_type].copy() # execution time plt.figure() model_df["original_file_size_mb"] = model_df["original_file_size"] / 1e6 model_df["compression_time_s"] = model_df["compression_time"] / 1e9 model_df["decompression_time_s"] = model_df["decompression_time"] / 1e9 # compression sns.lineplot( data=model_df, x="original_file_size_mb", y="compression_time_s", hue="context_length", palette="Set1", markers=True, legend="brief", linestyle="-" ) # decompression sns.lineplot( data=model_df, x="original_file_size_mb", y="decompression_time_s", hue="context_length", palette="Set1", markers=True, legend=False, linestyle="--" ) plt.title(f"{model_type.capitalize()} compression and decompression time: {dataset_type}") plt.xlabel("file size [MB]") plt.ylabel("Time [s]") plt.yscale("log") plt.legend([f"{style}, {c_type}" for style, c_type in zip(["Solid", "Dashed"], ["compression", "decompression"])]) plt.tight_layout() plt.savefig(f"./graphs/{model_type}_{dataset_type}_execution_time.png") # compression ratio plt.figure() c256 = model_df[model_df["context_length"] == 256] c128 = model_df[model_df["context_length"] == 128] plt.plot(c256["original_file_size"] / 1e6, c256["compressed_file_size"] / 1e6, label="256") plt.plot(c128["original_file_size"] / 1e6, c128["compressed_file_size"] / 1e6, label="128") plt.title(f"{model_type.capitalize()} compressed file evolution: {dataset_type}") plt.xlabel("Original file size [MB]") plt.ylabel("Compressed file size [MB]") plt.legend() plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png") # if model_type == "cnn": # import numpy as np # # plt.figure() # for length, linestyle in [(128, '-'), (256, '--')]: # # extrapolate execution time to larger files # x = model_df[model_df["context_length"] == length]["original_file_size"] / 1e6 # y = model_df[model_df["context_length"] == length]["compression_time"] # y_decom = model_df[model_df["context_length"] == length]["decompression_time"] # # b1, loga1 = np.polyfit(x, np.log(y), 1) # b2, loga2 = np.polyfit(x, np.log(y_decom), 1) # # x_comp = np.linspace(0, 40, 1000) # x_decomp = np.linspace(0, 40, 1000) # a1 = np.exp(loga1) # a2 = np.exp(loga2) # # # plt.plot( # x_comp, a1 * np.exp(x_comp), # label=f"{length} compression", # linestyle=linestyle # ) # plt.plot( # x_decomp, a2 * np.exp(x_decomp), # label=f"{length} decompression", # linestyle=linestyle # ) # # # # plt.legend() # plt.title(f"Extrapolated execution time for CNN compression and decompression") # plt.xlabel("File size [MB]") # plt.ylabel("Time [s]") # plt.tight_layout() # plt.savefig(f"./graphs/{model_type}_{dataset_type}_extrapolated_execution_time.png") for model_type in df["model_type"].unique(): model_df = df[df["model_type"] == model_type] plt.figure(figsize=(10, 4)) bar_height = 0.25 files = model_df["input_file_name"].unique() y = np.arange(len(files)) c256 = model_df[model_df["context_length"] == 256] c128 = model_df[model_df["context_length"] == 128] plt.barh( y - bar_height / 2, c256["mse_loss"], height=bar_height, label="256" ) plt.barh( y + bar_height / 2, c128["mse_loss"], height=bar_height, label="128" ) plt.yticks(y, files, rotation=45, ha="right") plt.title(f"MSE loss for different context lengths") plt.xlabel("MSE loss") plt.ylabel("Filename") plt.legend() plt.tight_layout() plt.savefig(f"./graphs/{model_type}_loss.png")