feat: Graphs checkpoint
This commit is contained in:
parent
15062d8884
commit
b62f06018d
1 changed files with 201 additions and 138 deletions
|
|
@ -3,7 +3,6 @@ import os
|
|||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import scipy
|
||||
import seaborn as sns
|
||||
from matplotlib.figure import Figure
|
||||
|
||||
|
|
@ -18,21 +17,18 @@ RATE_COL = 'compression_ratio'
|
|||
DISTORTION_COL = 'mse_loss'
|
||||
|
||||
|
||||
def original_v_compressed_filesize(
|
||||
df: pd.DataFrame,
|
||||
unique_labels: list[str],
|
||||
palette_dict,
|
||||
markers_dict
|
||||
) -> Figure:
|
||||
def original_v_compressed_filesize(df: pd.DataFrame,
|
||||
unique_labels: list[str], palette_dict, markers_dict
|
||||
) -> Figure:
|
||||
"""The "rate" graph"""
|
||||
plt.figure()
|
||||
|
||||
break_point = 0.1
|
||||
|
||||
ax_small, ax_large = split_graph(df, INPUT_SIZE_COL, 'Input size (MB)',
|
||||
OUTPUT_SIZE_COL, 'Compressed size (log, MB)',
|
||||
break_point, 'Compressor', 'upper left', LABEL_COL,
|
||||
unique_labels, palette_dict, markers_dict)
|
||||
_, ax_small, ax_large = split_graph(df, INPUT_SIZE_COL, 'Input size (MB)',
|
||||
OUTPUT_SIZE_COL, 'Compressed size (log, MB)',
|
||||
break_point, 'Compressor', 'upper left', LABEL_COL,
|
||||
unique_labels, palette_dict, markers_dict)
|
||||
|
||||
# Add Baseline (y=x)
|
||||
df_small, df_large = df[df[INPUT_SIZE_COL] < break_point], df[df[INPUT_SIZE_COL] > break_point]
|
||||
|
|
@ -50,51 +46,136 @@ def original_v_compressed_filesize(
|
|||
return plt.gcf()
|
||||
|
||||
|
||||
def filesize_v_compression_time(
|
||||
df: pd.DataFrame,
|
||||
unique_labels: list[str],
|
||||
palette_dict,
|
||||
markers_dict
|
||||
) -> Figure:
|
||||
def compression_ratios(df: pd.DataFrame, unique_labels, palette_dict) -> Figure:
|
||||
"""The "rate" graph"""
|
||||
plt.figure()
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
sns.boxplot(
|
||||
data=df,
|
||||
x=RATE_COL,
|
||||
y=LABEL_COL,
|
||||
hue=LABEL_COL,
|
||||
hue_order=unique_labels,
|
||||
palette=palette_dict,
|
||||
ax=ax,
|
||||
fill=False
|
||||
)
|
||||
|
||||
ax.set_xlabel('Compression ratio')
|
||||
ax.set_ylabel('Compressor')
|
||||
|
||||
plt.yticks(rotation=45, ha="right")
|
||||
|
||||
ax.grid(True)
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
|
||||
def filesize_v_compression_time(df: pd.DataFrame,
|
||||
unique_labels: list[str], palette_dict, markers_dict
|
||||
) -> Figure:
|
||||
"""The "execution time" graph"""
|
||||
plt.figure()
|
||||
|
||||
split_graph(df, INPUT_SIZE_COL, 'Input size (MB)',
|
||||
COMPRESS_TIME_COL, 'Compression time (log, s)',
|
||||
0.1, 'Compressor', 'center left', LABEL_COL,
|
||||
unique_labels, palette_dict, markers_dict)
|
||||
f, _, _ = split_graph(df, INPUT_SIZE_COL, 'Input size (MB)',
|
||||
COMPRESS_TIME_COL, 'Runtime (log, s)',
|
||||
0.1, 'Compressor', 'center left', LABEL_COL,
|
||||
unique_labels, palette_dict, markers_dict)
|
||||
|
||||
f.text(0.5, 1, 'Compression runtime for different filesizes using each compressor', va='center', ha='center')
|
||||
plt.yscale('log')
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
|
||||
def filesize_v_decompression_time(
|
||||
df: pd.DataFrame,
|
||||
unique_labels: list[str],
|
||||
palette_dict,
|
||||
markers_dict
|
||||
) -> Figure:
|
||||
def filesize_v_decompression_time(df: pd.DataFrame,
|
||||
unique_labels: list[str], palette_dict, markers_dict
|
||||
) -> Figure:
|
||||
"""The "execution time" graph"""
|
||||
plt.figure()
|
||||
|
||||
split_graph(df, INPUT_SIZE_COL, 'Input size (MB)',
|
||||
DECOMPRESS_TIME_COL, 'Decompression time (log, s)',
|
||||
0.1, 'Compressor', 'center left', LABEL_COL,
|
||||
unique_labels, palette_dict, markers_dict)
|
||||
f, _, _ = split_graph(df, INPUT_SIZE_COL, 'Input size (MB)',
|
||||
DECOMPRESS_TIME_COL, 'Runtime (log, s)',
|
||||
0.1, 'Compressor', 'center left', LABEL_COL,
|
||||
unique_labels, palette_dict, markers_dict)
|
||||
|
||||
f.text(0.5, 1, 'Decompression runtime for different filesizes using each compressor', va='center', ha='center')
|
||||
plt.yscale('log')
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
|
||||
def filesize_v_mse(df: pd.DataFrame) -> Figure:
|
||||
"""The "distortion" graph"""
|
||||
plt.figure()
|
||||
|
||||
df = df[df[DISTORTION_COL] != 0]
|
||||
df = df[df[ALGORITHM_COL] == 'Autoencoder']
|
||||
|
||||
df.sort_values(by=INPUT_SIZE_COL, inplace=True)
|
||||
|
||||
def filename_and_size(row):
|
||||
filename = row['input_filename']
|
||||
size = row[INPUT_SIZE_COL]
|
||||
return f"{filename} ({size:.4f} MB)"
|
||||
|
||||
df['input_filename_size'] = df.apply(filename_and_size, axis=1)
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
sns.barplot(
|
||||
data=df,
|
||||
y='input_filename',
|
||||
x=DISTORTION_COL,
|
||||
hue=CONTEXT_COL,
|
||||
ax=ax,
|
||||
palette='Set2'
|
||||
)
|
||||
|
||||
plt.title('MSE for autoencoder')
|
||||
plt.xlabel('MSE')
|
||||
plt.ylabel('Filename')
|
||||
plt.yticks(rotation=45, ha="right")
|
||||
plt.legend(title='Context size')
|
||||
|
||||
plt.grid(True)
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
|
||||
def mse_losses(df: pd.DataFrame, unique_labels, palette_dict) -> Figure:
|
||||
"""The "distortion" graph"""
|
||||
plt.figure()
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
sns.boxplot(
|
||||
data=df,
|
||||
x=DISTORTION_COL,
|
||||
y=LABEL_COL,
|
||||
hue=LABEL_COL,
|
||||
hue_order=unique_labels,
|
||||
palette=palette_dict,
|
||||
ax=ax,
|
||||
fill=False
|
||||
)
|
||||
|
||||
ax.set_xlabel('MSE')
|
||||
ax.set_ylabel('Compressor')
|
||||
|
||||
plt.yticks(rotation=45, ha="right")
|
||||
|
||||
ax.grid(True)
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
|
||||
def split_graph(
|
||||
df, x, x_axis_label, y, y_axis_label,
|
||||
break_point, legend_title, legend_loc, hue, unique_labels, palette_dict, markers_dict
|
||||
) -> tuple:
|
||||
df = df.sort_values(by=x)
|
||||
|
||||
f, (ax_left, ax_right) = plt.subplots(1, 2, sharey=True, figsize=(10, 5))
|
||||
f, (ax_left, ax_right) = plt.subplots(1, 2, sharey=True, figsize=(8, 4))
|
||||
|
||||
df_left = df[df[x] < break_point]
|
||||
sns.scatterplot(
|
||||
|
|
@ -107,7 +188,8 @@ def split_graph(
|
|||
palette=palette_dict,
|
||||
style=hue,
|
||||
style_order=unique_labels,
|
||||
markers=markers_dict
|
||||
markers=markers_dict,
|
||||
# s=150
|
||||
)
|
||||
ax_left.set_xlabel('')
|
||||
|
||||
|
|
@ -122,7 +204,8 @@ def split_graph(
|
|||
palette=palette_dict,
|
||||
style=hue,
|
||||
style_order=unique_labels,
|
||||
markers=markers_dict
|
||||
markers=markers_dict,
|
||||
# s=150
|
||||
)
|
||||
ax_right.set_xlabel('')
|
||||
ax_right.set_ylabel('')
|
||||
|
|
@ -159,48 +242,7 @@ def split_graph(
|
|||
ax_right.grid(True)
|
||||
|
||||
plt.tight_layout()
|
||||
return ax_left, ax_right
|
||||
|
||||
|
||||
def compression_v_mse_scatter(df: pd.DataFrame) -> Figure:
|
||||
"""The "distortion" graph"""
|
||||
plt.figure()
|
||||
|
||||
sns.scatterplot(
|
||||
data=df,
|
||||
x=RATE_COL,
|
||||
y=DISTORTION_COL
|
||||
)
|
||||
|
||||
plt.xscale('log')
|
||||
plt.xlabel('Compression ratio (log)')
|
||||
|
||||
# TODO This does not work properly
|
||||
|
||||
plt.yscale('log')
|
||||
plt.ylabel('MSE (log)')
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
|
||||
def compression_ratios(df: pd.DataFrame) -> Figure:
|
||||
"""The "distortion" graph"""
|
||||
plt.figure()
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
sns.boxplot(
|
||||
data=df,
|
||||
x=RATE_COL,
|
||||
y=LABEL_COL,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
ax.set_xlabel('Compression ratio')
|
||||
ax.set_ylabel('')
|
||||
|
||||
ax.grid(True)
|
||||
|
||||
return plt.gcf()
|
||||
return f, ax_left, ax_right
|
||||
|
||||
|
||||
def generate(
|
||||
|
|
@ -213,22 +255,29 @@ def generate(
|
|||
original_v_compressed_filesize(df, unique_labels, palette_dict, markers_dict).savefig(
|
||||
os.path.join(tgt_dir, 'original_v_compressed_filesize.png'),
|
||||
bbox_inches='tight',
|
||||
dpi=dpi
|
||||
)
|
||||
|
||||
filesize_v_compression_time(df, unique_labels, palette_dict, markers_dict).savefig(
|
||||
os.path.join(tgt_dir, 'filesize_v_compression_time.png'),
|
||||
bbox_inches='tight',
|
||||
dpi=dpi
|
||||
)
|
||||
filesize_v_decompression_time(df, unique_labels, palette_dict, markers_dict).savefig(
|
||||
os.path.join(tgt_dir, 'filesize_v_decompression_time.png'),
|
||||
bbox_inches='tight',
|
||||
dpi=dpi
|
||||
)
|
||||
|
||||
# compression_v_mse_scatter(df).savefig(os.path.join(tgt_dir, 'compression_v_mse.png'), bbox_inches='tight')
|
||||
compression_ratios(df).savefig(os.path.join(tgt_dir, 'compression_ratios.png'), bbox_inches='tight')
|
||||
compression_ratios(df, unique_labels, palette_dict).savefig(
|
||||
os.path.join(tgt_dir, 'compression_ratios.png'),
|
||||
bbox_inches='tight'
|
||||
)
|
||||
filesize_v_mse(df).savefig(
|
||||
os.path.join(tgt_dir, 'filesize_mse.png'),
|
||||
bbox_inches='tight'
|
||||
)
|
||||
mse_losses(df, unique_labels, palette_dict).savefig(
|
||||
os.path.join(tgt_dir, 'mse_losses.png'),
|
||||
bbox_inches='tight'
|
||||
)
|
||||
|
||||
|
||||
def setup(tgt_dir):
|
||||
|
|
@ -239,6 +288,7 @@ def setup(tgt_dir):
|
|||
params = {'text.usetex': True,
|
||||
'font.size': 11,
|
||||
'font.family': 'serif',
|
||||
'figure.dpi': 300,
|
||||
}
|
||||
plt.rcParams.update(params)
|
||||
|
||||
|
|
@ -266,8 +316,8 @@ def preprocessing(df: pd.DataFrame) -> tuple:
|
|||
n_labels = len(unique_labels)
|
||||
|
||||
# Create fixed palette and marker mapping
|
||||
palette_dict = dict(zip(unique_labels, sns.color_palette("tab10", n_labels)))
|
||||
markers_dict = dict(zip(unique_labels, ['x', '+', '1', '2', '3', '4']))
|
||||
palette_dict = dict(zip(unique_labels, sns.color_palette("Set2", n_labels)))
|
||||
markers_dict = dict(zip(unique_labels, ['o', '^', 'v', 's', 'D', 'H', 'X']))
|
||||
|
||||
return df, unique_labels, palette_dict, markers_dict
|
||||
|
||||
|
|
@ -281,20 +331,26 @@ def main():
|
|||
generate(*preprocessing(df), tgt_dir=tgt_dir, dpi=150)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
exit()
|
||||
|
||||
def old_results():
|
||||
# read in the csv
|
||||
df = pd.read_csv("compression_results.csv")
|
||||
|
||||
# Make compatible with new code
|
||||
df[INPUT_SIZE_COL] = df['original_file_size']
|
||||
df[OUTPUT_SIZE_COL] = df['compressed_file_size']
|
||||
df['compressor'] = df['model_type']
|
||||
df[CONTEXT_COL] = df['context_length']
|
||||
#
|
||||
|
||||
df, unique_labels, palette_dict, markers_dict = preprocessing(df)
|
||||
|
||||
for dataset_type in df["dataset_type"].unique():
|
||||
for model_type in df["model_type"].unique():
|
||||
dataset_df = df[df["dataset_type"] == dataset_type]
|
||||
model_df = dataset_df[dataset_df["model_type"] == model_type].copy()
|
||||
|
||||
# execution time
|
||||
plt.figure()
|
||||
plt.figure(figsize=(4, 3))
|
||||
model_df["original_file_size_mb"] = model_df["original_file_size"] / 1e6
|
||||
model_df["compression_time_s"] = model_df["compression_time"] / 1e9
|
||||
model_df["decompression_time_s"] = model_df["decompression_time"] / 1e9
|
||||
|
|
@ -304,7 +360,7 @@ if __name__ == "__main__":
|
|||
x="original_file_size_mb",
|
||||
y="compression_time_s",
|
||||
hue="context_length",
|
||||
palette="Set1",
|
||||
palette="Set2",
|
||||
markers=True,
|
||||
legend="brief",
|
||||
linestyle="-"
|
||||
|
|
@ -315,14 +371,14 @@ if __name__ == "__main__":
|
|||
x="original_file_size_mb",
|
||||
y="decompression_time_s",
|
||||
hue="context_length",
|
||||
palette="Set1",
|
||||
palette="Set2",
|
||||
markers=True,
|
||||
legend=False,
|
||||
linestyle="--"
|
||||
)
|
||||
plt.title(f"{model_type.capitalize()} compression and decompression time: {dataset_type}")
|
||||
plt.xlabel("file size [MB]")
|
||||
plt.ylabel("Time [s]")
|
||||
# plt.title(f"{model_type.capitalize()} compression and decompression time: {dataset_type}")
|
||||
plt.xlabel("File size (MB)")
|
||||
plt.ylabel("Time (log, s)")
|
||||
plt.yscale("log")
|
||||
plt.legend(
|
||||
[f"{style}, {c_type}" for style, c_type in zip(["Solid", "Dashed"], ["compression", "decompression"])])
|
||||
|
|
@ -330,56 +386,57 @@ if __name__ == "__main__":
|
|||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_execution_time.png")
|
||||
|
||||
# compression ratio
|
||||
plt.figure()
|
||||
plt.figure(figsize=(4, 3))
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
|
||||
plt.plot(c256["original_file_size"] / 1e6, c256["compressed_file_size"] / 1e6, label="256")
|
||||
plt.plot(c128["original_file_size"] / 1e6, c128["compressed_file_size"] / 1e6, label="128")
|
||||
plt.title(f"{model_type.capitalize()} compressed file evolution: {dataset_type}")
|
||||
plt.xlabel("Original file size [MB]")
|
||||
plt.ylabel("Compressed file size [MB]")
|
||||
plt.legend()
|
||||
# plt.title(f"{model_type.capitalize()} compressed file evolution: {dataset_type}")
|
||||
plt.xlabel("Original file size (MB)")
|
||||
plt.ylabel("Compressed file size (MB)")
|
||||
plt.ylim(0, model_df["compressed_file_size"].max() / 1e6)
|
||||
plt.legend(title="Context size")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png")
|
||||
|
||||
# if model_type == "cnn":
|
||||
# import numpy as np
|
||||
#
|
||||
# plt.figure()
|
||||
# for length, linestyle in [(128, '-'), (256, '--')]:
|
||||
# # extrapolate execution time to larger files
|
||||
# x = model_df[model_df["context_length"] == length]["original_file_size"] / 1e6
|
||||
# y = model_df[model_df["context_length"] == length]["compression_time"]
|
||||
# y_decom = model_df[model_df["context_length"] == length]["decompression_time"]
|
||||
#
|
||||
# b1, loga1 = np.polyfit(x, np.log(y), 1)
|
||||
# b2, loga2 = np.polyfit(x, np.log(y_decom), 1)
|
||||
#
|
||||
# x_comp = np.linspace(0, 40, 1000)
|
||||
# x_decomp = np.linspace(0, 40, 1000)
|
||||
# a1 = np.exp(loga1)
|
||||
# a2 = np.exp(loga2)
|
||||
#
|
||||
#
|
||||
# plt.plot(
|
||||
# x_comp, a1 * np.exp(x_comp),
|
||||
# label=f"{length} compression",
|
||||
# linestyle=linestyle
|
||||
# )
|
||||
# plt.plot(
|
||||
# x_decomp, a2 * np.exp(x_decomp),
|
||||
# label=f"{length} decompression",
|
||||
# linestyle=linestyle
|
||||
# )
|
||||
#
|
||||
#
|
||||
#
|
||||
# plt.legend()
|
||||
# plt.title(f"Extrapolated execution time for CNN compression and decompression")
|
||||
# plt.xlabel("File size [MB]")
|
||||
# plt.ylabel("Time [s]")
|
||||
# plt.tight_layout()
|
||||
# plt.savefig(f"./graphs/{model_type}_{dataset_type}_extrapolated_execution_time.png")
|
||||
if model_type == "cnn":
|
||||
|
||||
plt.figure()
|
||||
for length, linestyle in [(128, '-'), (256, '--')]:
|
||||
# extrapolate execution time to larger files
|
||||
x = model_df[model_df["context_length"] == length]["original_file_size"] / 1e6
|
||||
y = model_df[model_df["context_length"] == length]["compression_time"]
|
||||
y_decom = model_df[model_df["context_length"] == length]["decompression_time"]
|
||||
|
||||
b1, loga1 = np.polyfit(x, np.log(y), 1)
|
||||
b2, loga2 = np.polyfit(x, np.log(y_decom), 1)
|
||||
|
||||
x_comp = np.linspace(0, 40, 1000)
|
||||
x_decomp = np.linspace(0, 40, 1000)
|
||||
a1 = np.exp(loga1)
|
||||
a2 = np.exp(loga2)
|
||||
|
||||
plt.plot(
|
||||
x_comp, a1 * np.exp(x_comp),
|
||||
label=f"{length} compression",
|
||||
linestyle=linestyle
|
||||
)
|
||||
plt.plot(
|
||||
x_decomp, a2 * np.exp(x_decomp),
|
||||
label=f"{length} decompression",
|
||||
linestyle=linestyle
|
||||
)
|
||||
|
||||
plt.grid(True)
|
||||
plt.legend()
|
||||
plt.title(f"(Log-linear) Extrapolated execution time for CNN")
|
||||
# plt.xscale('log')
|
||||
plt.xlabel("File size (MB)")
|
||||
plt.yscale('log')
|
||||
plt.ylabel("Time (log, s)")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_extrapolated_execution_time.png")
|
||||
|
||||
for model_type in df["model_type"].unique():
|
||||
model_df = df[df["model_type"] == model_type]
|
||||
|
|
@ -395,19 +452,25 @@ if __name__ == "__main__":
|
|||
y - bar_height / 2,
|
||||
c256["mse_loss"],
|
||||
height=bar_height,
|
||||
label="256"
|
||||
label="256",
|
||||
)
|
||||
|
||||
plt.barh(
|
||||
y + bar_height / 2,
|
||||
c128["mse_loss"],
|
||||
height=bar_height,
|
||||
label="128"
|
||||
label="128",
|
||||
)
|
||||
plt.yticks(y, files, rotation=45, ha="right")
|
||||
plt.title(f"MSE loss for different context lengths")
|
||||
plt.xlabel("MSE loss")
|
||||
plt.ylabel("Filename")
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_loss.png")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
old_results()
|
||||
|
|
|
|||
Reference in a new issue