fix: several graphs reiterated, todo: extrapolation to prove CNN will not scale well
|
Before Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 21 KiB |
|
Before Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 18 KiB |
155
make_graphs.py
|
|
@ -1,5 +1,6 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -9,63 +10,40 @@ if __name__ == "__main__":
|
|||
for dataset_type in df["dataset_type"].unique():
|
||||
for model_type in df["model_type"].unique():
|
||||
dataset_df = df[df["dataset_type"] == dataset_type]
|
||||
model_df = dataset_df[dataset_df["model_type"] == model_type]
|
||||
model_df = dataset_df[dataset_df["model_type"] == model_type].copy()
|
||||
|
||||
# execution time
|
||||
plt.figure()
|
||||
grouped = model_df.groupby("context_length")["compression_time"].mean() / 1e9
|
||||
labels = grouped.index.astype(str) # "128", "256"
|
||||
x = np.arange(len(labels)) # [0, 1]
|
||||
|
||||
plt.bar(x, grouped.values, width=0.6)
|
||||
plt.title(f"{model_type.capitalize()} mean compression time")
|
||||
plt.xticks(x, labels)
|
||||
plt.xlabel("Context length")
|
||||
plt.ylabel("Mean compression time [s]")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_time.png")
|
||||
|
||||
plt.figure()
|
||||
grouped = model_df.groupby("context_length")["decompression_time"].mean() / 1e9
|
||||
labels = grouped.index.astype(str) # "128", "256"
|
||||
x = np.arange(len(labels)) # [0, 1]
|
||||
|
||||
plt.bar(x, grouped.values, width=0.6)
|
||||
plt.title(f"{model_type.capitalize()} mean decompression time")
|
||||
plt.xticks(x, labels)
|
||||
plt.xlabel("Context length")
|
||||
plt.ylabel("Mean decompression time [s]")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_decompression_time.png")
|
||||
|
||||
# loss
|
||||
plt.figure(figsize=(10, 4))
|
||||
bar_height = 0.25
|
||||
files = model_df["input_file_name"].unique()
|
||||
y = np.arange(len(files))
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
|
||||
plt.barh(
|
||||
y - bar_height / 2,
|
||||
c256["mse_loss"],
|
||||
height=bar_height,
|
||||
label="256"
|
||||
model_df["original_file_size_mb"] = model_df["original_file_size"] / 1e6
|
||||
# compression
|
||||
sns.lineplot(
|
||||
data=model_df,
|
||||
x="original_file_size_mb",
|
||||
y="compression_time",
|
||||
hue="context_length",
|
||||
palette="Set1",
|
||||
markers=True,
|
||||
legend="brief",
|
||||
linestyle="-"
|
||||
)
|
||||
|
||||
plt.barh(
|
||||
y + bar_height / 2,
|
||||
c128["mse_loss"],
|
||||
height=bar_height,
|
||||
label="128"
|
||||
# decompression
|
||||
sns.lineplot(
|
||||
data=model_df,
|
||||
x="original_file_size_mb",
|
||||
y="decompression_time",
|
||||
hue="context_length",
|
||||
palette="Set1",
|
||||
markers=True,
|
||||
legend=False,
|
||||
linestyle="--"
|
||||
)
|
||||
plt.yticks(y, files, rotation=45, ha="right")
|
||||
plt.title(f"{model_type.capitalize()} MSE loss for different context lengths")
|
||||
plt.xlabel("MSE loss")
|
||||
plt.ylabel("Filename")
|
||||
plt.legend()
|
||||
plt.title(f"{model_type.capitalize()} compression and decompression time: {dataset_type}")
|
||||
plt.xlabel("file size [MB]")
|
||||
plt.ylabel("Time [s]")
|
||||
plt.yscale("log")
|
||||
plt.legend([f"{style}, {c_type}" for style, c_type in zip(["Solid", "Dashed"], ["compression", "decompression"])])
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_accuracy.png")
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_execution_time.png")
|
||||
|
||||
# compression ratio
|
||||
plt.figure()
|
||||
|
|
@ -74,8 +52,79 @@ if __name__ == "__main__":
|
|||
|
||||
plt.plot(c256["original_file_size"] / 1e6, c256["compressed_file_size"] / 1e6, label="256")
|
||||
plt.plot(c128["original_file_size"] / 1e6, c128["compressed_file_size"] / 1e6, label="128")
|
||||
plt.title(f"{model_type.capitalize()} compressed file evolution")
|
||||
plt.title(f"{model_type.capitalize()} compressed file evolution: {dataset_type}")
|
||||
plt.xlabel("Original file size [MB]")
|
||||
plt.ylabel("Compressed file size [MB]")
|
||||
plt.legend()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png")
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png")
|
||||
|
||||
|
||||
# if model_type == "cnn":
|
||||
# import numpy as np
|
||||
#
|
||||
# plt.figure()
|
||||
# for length, linestyle in [(128, '-'), (256, '--')]:
|
||||
# # extrapolate execution time to larger files
|
||||
# x = model_df[model_df["context_length"] == length]["original_file_size"] / 1e6
|
||||
# y = model_df[model_df["context_length"] == length]["compression_time"]
|
||||
# y_decom = model_df[model_df["context_length"] == length]["decompression_time"]
|
||||
#
|
||||
# b1, loga1 = np.polyfit(x, np.log(y), 1)
|
||||
# b2, loga2 = np.polyfit(x, np.log(y_decom), 1)
|
||||
#
|
||||
# x_comp = np.linspace(0, 40, 1000)
|
||||
# x_decomp = np.linspace(0, 40, 1000)
|
||||
# a1 = np.exp(loga1)
|
||||
# a2 = np.exp(loga2)
|
||||
#
|
||||
#
|
||||
# plt.plot(
|
||||
# x_comp, a1 * np.exp(x_comp),
|
||||
# label=f"{length} compression",
|
||||
# linestyle=linestyle
|
||||
# )
|
||||
# plt.plot(
|
||||
# x_decomp, a2 * np.exp(x_decomp),
|
||||
# label=f"{length} decompression",
|
||||
# linestyle=linestyle
|
||||
# )
|
||||
#
|
||||
#
|
||||
#
|
||||
# plt.legend()
|
||||
# plt.title(f"Extrapolated execution time for CNN compression and decompression")
|
||||
# plt.xlabel("File size [MB]")
|
||||
# plt.ylabel("Time [s]")
|
||||
# plt.tight_layout()
|
||||
# plt.savefig(f"./graphs/{model_type}_{dataset_type}_extrapolated_execution_time.png")
|
||||
|
||||
for model_type in df["model_type"].unique():
|
||||
model_df = df[df["model_type"] == model_type]
|
||||
|
||||
plt.figure(figsize=(10, 4))
|
||||
bar_height = 0.25
|
||||
files = model_df["input_file_name"].unique()
|
||||
y = np.arange(len(files))
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
|
||||
plt.barh(
|
||||
y - bar_height / 2,
|
||||
c256["mse_loss"],
|
||||
height=bar_height,
|
||||
label="256"
|
||||
)
|
||||
|
||||
plt.barh(
|
||||
y + bar_height / 2,
|
||||
c128["mse_loss"],
|
||||
height=bar_height,
|
||||
label="128"
|
||||
)
|
||||
plt.yticks(y, files, rotation=45, ha="right")
|
||||
plt.title(f"MSE loss for different context lengths")
|
||||
plt.xlabel("MSE loss")
|
||||
plt.ylabel("Filename")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_loss.png")
|
||||
|
|
@ -9,6 +9,8 @@ dependencies = [
|
|||
"fsspec==2024.9.0",
|
||||
"lorem>=0.1.1",
|
||||
"arithmeticencodingpython",
|
||||
"pandas-stubs~=2.3.3",
|
||||
"seaborn>=0.13.2",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
|
|||
40
uv.lock
generated
|
|
@ -1613,6 +1613,19 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pandas-stubs"
|
||||
version = "2.3.3.251201"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "numpy" },
|
||||
{ name = "types-pytz" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ee/a6/491b2af2cb3ee232765a73fb273a44cc1ac33b154f7745b2df2ee1dc4d01/pandas_stubs-2.3.3.251201.tar.gz", hash = "sha256:7a980f4f08cff2a6d7e4c6d6d26f4c5fcdb82a6f6531489b2f75c81567fe4536", size = 107787, upload-time = "2025-12-01T18:29:22.403Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/68/78a3c253f146254b8e2c19f4a4768f272e12ef11001d9b45ec7b165db054/pandas_stubs-2.3.3.251201-py3-none-any.whl", hash = "sha256:eb5c9b6138bd8492fd74a47b09c9497341a278fcfbc8633ea4b35b230ebf4be5", size = 164638, upload-time = "2025-12-01T18:29:21.006Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pillow"
|
||||
version = "12.0.0"
|
||||
|
|
@ -1718,6 +1731,8 @@ dependencies = [
|
|||
{ name = "datasets" },
|
||||
{ name = "fsspec" },
|
||||
{ name = "lorem" },
|
||||
{ name = "pandas-stubs" },
|
||||
{ name = "seaborn" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
|
|
@ -1746,7 +1761,9 @@ requires-dist = [
|
|||
{ name = "matplotlib", marker = "extra == 'dev'", specifier = ">=3.10.7" },
|
||||
{ name = "memray", marker = "extra == 'dev'", specifier = ">=1.19.1" },
|
||||
{ name = "optuna", marker = "extra == 'dev'", specifier = "==4.5.0" },
|
||||
{ name = "pandas-stubs", specifier = "~=2.3.3" },
|
||||
{ name = "regex", marker = "extra == 'dataset'", specifier = ">=2025.11.3" },
|
||||
{ name = "seaborn", specifier = ">=0.13.2" },
|
||||
{ name = "torch", marker = "extra == 'dev'", specifier = "==2.9.0" },
|
||||
{ name = "torchdata", marker = "extra == 'dev'", specifier = "==0.7.1" },
|
||||
{ name = "torchvision", marker = "extra == 'dev'", specifier = "==0.24.0" },
|
||||
|
|
@ -2116,6 +2133,20 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "seaborn"
|
||||
version = "0.13.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "matplotlib" },
|
||||
{ name = "numpy" },
|
||||
{ name = "pandas" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696, upload-time = "2024-01-25T13:21:52.551Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "80.9.0"
|
||||
|
|
@ -2361,6 +2392,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/5e/dd/5cbf31f402f1cc0ab087c94d4669cfa55bd1e818688b910631e131d74e75/typer_slim-0.20.0-py3-none-any.whl", hash = "sha256:f42a9b7571a12b97dddf364745d29f12221865acef7a2680065f9bb29c7dc89d", size = 47087, upload-time = "2025-10-20T17:03:44.546Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-pytz"
|
||||
version = "2025.2.0.20251108"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/40/ff/c047ddc68c803b46470a357454ef76f4acd8c1088f5cc4891cdd909bfcf6/types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb", size = 10961, upload-time = "2025-11-08T02:55:57.001Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c", size = 10116, upload-time = "2025-11-08T02:55:56.194Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.15.0"
|
||||
|
|
|
|||