feat: graphs + models + updated finished graph code + data in csv
BIN
graphs/autoencoder_enwik9_accuracy.png
Normal file
|
After Width: | Height: | Size: 25 KiB |
BIN
graphs/autoencoder_enwik9_compression_ratio.png
Normal file
|
After Width: | Height: | Size: 30 KiB |
BIN
graphs/autoencoder_enwik9_compression_time.png
Normal file
|
After Width: | Height: | Size: 19 KiB |
BIN
graphs/autoencoder_enwik9_decompression_time.png
Normal file
|
After Width: | Height: | Size: 19 KiB |
BIN
graphs/autoencoder_genome_accuracy.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
graphs/autoencoder_genome_compression_ratio.png
Normal file
|
After Width: | Height: | Size: 31 KiB |
BIN
graphs/autoencoder_genome_compression_time.png
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
graphs/autoencoder_genome_decompression_time.png
Normal file
|
After Width: | Height: | Size: 23 KiB |
BIN
graphs/cnn_enwik9_accuracy.png
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
graphs/cnn_enwik9_compression_ratio.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
graphs/cnn_enwik9_compression_time.png
Normal file
|
After Width: | Height: | Size: 17 KiB |
BIN
graphs/cnn_enwik9_decompression_time.png
Normal file
|
After Width: | Height: | Size: 17 KiB |
BIN
graphs/cnn_genome_accuracy.png
Normal file
|
After Width: | Height: | Size: 30 KiB |
BIN
graphs/cnn_genome_compression_ratio.png
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
graphs/cnn_genome_compression_time.png
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
graphs/cnn_genome_decompression_time.png
Normal file
|
After Width: | Height: | Size: 18 KiB |
125
make_graphs.py
|
|
@ -6,73 +6,76 @@ if __name__ == "__main__":
|
|||
# read in the csv
|
||||
df = pd.read_csv("./results/compress/compression_results.csv")
|
||||
|
||||
for model_type in df["model_type"].unique():
|
||||
model_df = df[df["model_type"] == model_type]
|
||||
for dataset_type in df["dataset_type"].unique():
|
||||
for model_type in df["model_type"].unique():
|
||||
dataset_df = df[df["dataset_type"] == dataset_type]
|
||||
model_df = dataset_df[dataset_df["model_type"] == model_type]
|
||||
|
||||
# execution time
|
||||
plt.figure()
|
||||
grouped = model_df.groupby("context_length")["compression_time"].mean() / 1e9
|
||||
labels = grouped.index.astype(str) # "128", "256"
|
||||
x = np.arange(len(labels)) # [0, 1]
|
||||
# execution time
|
||||
plt.figure()
|
||||
grouped = model_df.groupby("context_length")["compression_time"].mean() / 1e9
|
||||
labels = grouped.index.astype(str) # "128", "256"
|
||||
x = np.arange(len(labels)) # [0, 1]
|
||||
|
||||
plt.bar(x, grouped.values, width=0.6)
|
||||
plt.title(f"{model_type} mean compression time")
|
||||
plt.xticks(x, labels)
|
||||
plt.xlabel("Context length")
|
||||
plt.ylabel("Mean compression time [s]")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{}_compression_time.png")
|
||||
plt.bar(x, grouped.values, width=0.6)
|
||||
plt.title(f"{model_type.capitalize()} mean compression time")
|
||||
plt.xticks(x, labels)
|
||||
plt.xlabel("Context length")
|
||||
plt.ylabel("Mean compression time [s]")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_time.png")
|
||||
|
||||
plt.figure()
|
||||
grouped = model_df.groupby("context_length")["decompression_time"].mean() / 1e9
|
||||
labels = grouped.index.astype(str) # "128", "256"
|
||||
x = np.arange(len(labels)) # [0, 1]
|
||||
plt.figure()
|
||||
grouped = model_df.groupby("context_length")["decompression_time"].mean() / 1e9
|
||||
labels = grouped.index.astype(str) # "128", "256"
|
||||
x = np.arange(len(labels)) # [0, 1]
|
||||
|
||||
plt.bar(x, grouped.values, width=0.6)
|
||||
plt.title(f"{model_type} mean decompression time")
|
||||
plt.xticks(x, labels)
|
||||
plt.xlabel("Context length")
|
||||
plt.ylabel("Mean decompression time [s]")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{}_decompression_time.png")
|
||||
plt.bar(x, grouped.values, width=0.6)
|
||||
plt.title(f"{model_type.capitalize()} mean decompression time")
|
||||
plt.xticks(x, labels)
|
||||
plt.xlabel("Context length")
|
||||
plt.ylabel("Mean decompression time [s]")
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_decompression_time.png")
|
||||
|
||||
# accuracy
|
||||
plt.figure()
|
||||
bar_height = 0.25
|
||||
files = model_df["input_file_name"].unique()
|
||||
y = np.arange(len(files))
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
# accuracy
|
||||
plt.figure(figsize=(10, 4))
|
||||
bar_height = 0.25
|
||||
files = model_df["input_file_name"].unique()
|
||||
y = np.arange(len(files))
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
|
||||
plt.barh(
|
||||
y - bar_height / 2,
|
||||
c256["match_percentage"] * 100,
|
||||
height=bar_height,
|
||||
label="256"
|
||||
)
|
||||
plt.barh(
|
||||
y - bar_height / 2,
|
||||
c256["match_percentage"] * 100,
|
||||
height=bar_height,
|
||||
label="256"
|
||||
)
|
||||
|
||||
plt.barh(
|
||||
y + bar_height / 2,
|
||||
c128["match_percentage"] * 100,
|
||||
height=bar_height,
|
||||
label="128"
|
||||
)
|
||||
plt.yticks(y, files)
|
||||
plt.title(f"{model_type} time for different context lengths")
|
||||
plt.xlabel("accuracy")
|
||||
plt.ylabel("Filename")
|
||||
plt.legend()
|
||||
plt.savefig(f"./graphs/{model_type}_{}_accuracy.png")
|
||||
plt.barh(
|
||||
y + bar_height / 2,
|
||||
c128["match_percentage"] * 100,
|
||||
height=bar_height,
|
||||
label="128"
|
||||
)
|
||||
plt.yticks(y, files, rotation=45, ha="right")
|
||||
plt.title(f"{model_type.capitalize()} accuracy for different context lengths")
|
||||
plt.xlabel("Accuracy")
|
||||
plt.ylabel("Filename")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_accuracy.png")
|
||||
|
||||
# compression ratio
|
||||
plt.figure()
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
# compression ratio
|
||||
plt.figure()
|
||||
c256 = model_df[model_df["context_length"] == 256]
|
||||
c128 = model_df[model_df["context_length"] == 128]
|
||||
|
||||
plt.plot(c256["original_file_size"] / 1_000_000, c256["compressed_file_size"] / 1_000_000, label="256")
|
||||
plt.plot(c128["original_file_size"] / 1_000_000, c128["compressed_file_size"] / 1_000_000, label="128")
|
||||
plt.title(f"{model_type} compressed file evolution")
|
||||
plt.xlabel("Original file size [MB]")
|
||||
plt.ylabel("Compressed file size [MB]")
|
||||
plt.legend()
|
||||
plt.savefig(f"./graphs/{model_type}_{}_compression_ratio.png")
|
||||
plt.plot(c256["original_file_size"] / 1e6, c256["compressed_file_size"] / 1e6, label="256")
|
||||
plt.plot(c128["original_file_size"] / 1e6, c128["compressed_file_size"] / 1e6, label="128")
|
||||
plt.title(f"{model_type.capitalize()} compressed file evolution")
|
||||
plt.xlabel("Original file size [MB]")
|
||||
plt.ylabel("Compressed file size [MB]")
|
||||
plt.legend()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_compression_ratio.png")
|
||||
18
measure.py
|
|
@ -1,5 +1,4 @@
|
|||
import os
|
||||
from argparse import ArgumentParser
|
||||
from contextlib import contextmanager
|
||||
|
||||
import torch
|
||||
|
|
@ -54,13 +53,15 @@ if __name__ == "__main__":
|
|||
]
|
||||
|
||||
files_enwik9 = [
|
||||
# "text.txt",
|
||||
# "txt_large.txt",
|
||||
# "txt_xlarge.txt"
|
||||
"text.txt",
|
||||
"text_large.txt",
|
||||
"text_xlarge.txt"
|
||||
]
|
||||
|
||||
files_enwik9_cnn = [
|
||||
|
||||
"text_small.txt",
|
||||
"text_xsmall.txt",
|
||||
"text_xxsmall.txt"
|
||||
]
|
||||
|
||||
models = [
|
||||
|
|
@ -69,7 +70,7 @@ if __name__ == "__main__":
|
|||
("cnn-genome-full-256.pt", 256, "cnn", files_genome_cnn),
|
||||
("cnn-genome-full-128.pt", 128, "cnn", files_genome_cnn),
|
||||
("auto-enwik9-full-256.pt", 256, "autoencoder", files_enwik9),
|
||||
("auto-enwik9-full-128", 128, "autoencoder", files_enwik9),
|
||||
("auto-enwik9-full-128.pt", 128, "autoencoder", files_enwik9),
|
||||
("cnn-enwik9-full-256.pt", 256, "cnn", files_enwik9_cnn),
|
||||
("cnn-enwik9-full-128.pt", 128, "cnn", files_enwik9_cnn),
|
||||
]
|
||||
|
|
@ -78,10 +79,11 @@ if __name__ == "__main__":
|
|||
with open("./results/compress/compression_results.csv", "w") as f:
|
||||
# write header
|
||||
f.write(
|
||||
"model_type,model_name,context_length,input_file_name,original_file_size,compressed_file_size,match_percentage,compression_time,decompression_time\n"
|
||||
"model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,match_percentage,compression_time,decompression_time\n"
|
||||
)
|
||||
|
||||
for model, context_length, model_name, files in models:
|
||||
dataset_type = "genome" if "genome" in model else "enwik9"
|
||||
for file in files:
|
||||
in_file = f"./data/compression_sets/{file}"
|
||||
model_path = f"./models/{model_name}/{model}"
|
||||
|
|
@ -119,5 +121,5 @@ if __name__ == "__main__":
|
|||
os.remove("./output/tmp.pt")
|
||||
|
||||
f.write(
|
||||
f"{model_name},{model},{context_length},{file},{og_file_len},{compressed_size},{accuracy},{compression_time},{decompression_time}\n"
|
||||
f"{model_name},{model},{context_length},{dataset_type},{file},{og_file_len},{compressed_size},{accuracy},{compression_time},{decompression_time}\n"
|
||||
)
|
||||
|
|
|
|||
BIN
models/autoencoder/auto-enwik9-128.pt
Normal file
BIN
models/autoencoder/auto-enwik9-256.pt
Normal file
BIN
models/autoencoder/auto-enwik9-full-128.pt
Normal file
BIN
models/autoencoder/auto-enwik9-full-256.pt
Normal file
BIN
models/autoencoder/auto-genome-128.pt
Normal file
BIN
models/autoencoder/auto-genome-256.pt
Normal file
BIN
models/autoencoder/auto-genome-full-128.pt
Normal file
BIN
models/autoencoder/auto-genome-full-256.pt
Normal file
BIN
models/cnn/cnn-enwik9-128.pt
Normal file
BIN
models/cnn/cnn-enwik9-256.pt
Normal file
BIN
models/cnn/cnn-enwik9-full-128.pt
Normal file
BIN
models/cnn/cnn-enwik9-full-256.pt
Normal file
BIN
models/cnn/cnn-genome-256.pt
Normal file
BIN
models/cnn/cnn-genome-full-128.pt
Normal file
BIN
models/cnn/cnn-genome-full-256.pt
Normal file
|
|
@ -1,18 +1,16 @@
|
|||
import contextlib
|
||||
import math
|
||||
import struct
|
||||
from collections import deque
|
||||
from decimal import Decimal
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tqdm import tqdm
|
||||
import struct
|
||||
|
||||
from src.models import AutoEncoder
|
||||
from src.utils import reference_ae
|
||||
|
||||
NUMBITS = 64
|
||||
NUMBITS = 16
|
||||
|
||||
|
||||
def probs_to_freqs(probs, total_freq=8192):
|
||||
|
|
|
|||