fix: accuracy replaced by MSE loss, updated graphs

This commit is contained in:
RobinMeersman 2025-12-16 18:12:10 +01:00
parent 5bf45e47a5
commit 9cd37f156a
15 changed files with 38 additions and 38 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 21 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 23 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

Before After
Before After

View file

@ -38,7 +38,7 @@ if __name__ == "__main__":
plt.tight_layout()
plt.savefig(f"./graphs/{model_type}_{dataset_type}_decompression_time.png")
# accuracy
# loss
plt.figure(figsize=(10, 4))
bar_height = 0.25
files = model_df["input_file_name"].unique()
@ -48,20 +48,20 @@ if __name__ == "__main__":
plt.barh(
y - bar_height / 2,
c256["match_percentage"] * 100,
c256["mse_loss"],
height=bar_height,
label="256"
)
plt.barh(
y + bar_height / 2,
c128["match_percentage"] * 100,
c128["mse_loss"],
height=bar_height,
label="128"
)
plt.yticks(y, files, rotation=45, ha="right")
plt.title(f"{model_type.capitalize()} accuracy for different context lengths")
plt.xlabel("Accuracy")
plt.title(f"{model_type.capitalize()} MSE loss for different context lengths")
plt.xlabel("MSE loss")
plt.ylabel("Filename")
plt.legend()
plt.tight_layout()

View file

@ -2,6 +2,7 @@ import os
from contextlib import contextmanager
import torch
import torch.nn.functional as F
import src.process as p
@ -26,17 +27,16 @@ def timer():
def compare_files(original, decompressed: str | torch.Tensor):
with open(original, "rb") as file:
original = file.read()
original = torch.tensor(list(original), dtype=torch.uint8).cpu()
original = torch.tensor(list(original), dtype=torch.uint8).cpu().float()
if type(decompressed) == "str":
with open(decompressed, "rb") as file:
decompressed = file.read()
decompressed = torch.tensor(list(decompressed), dtype=torch.uint8).cpu()
decompressed = torch.tensor(list(decompressed), dtype=torch.uint8).cpu().float()
# count bytes matching
count = torch.sum(original == decompressed[:original.shape[0]])
accuracy = count / original.shape[0]
return accuracy
loss = F.mse_loss(decompressed[:original.shape[0]], original)
return loss
if __name__ == "__main__":
@ -79,7 +79,7 @@ if __name__ == "__main__":
with open("./results/compress/compression_results.csv", "w") as f:
# write header
f.write(
"model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,match_percentage,compression_time,decompression_time\n"
"model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time\n"
)
for model, context_length, model_name, files in models:
@ -110,7 +110,7 @@ if __name__ == "__main__":
decompression_time = t()
accuracy = compare_files(in_file, decompressed.flatten().cpu())
mse_loss = compare_files(in_file, decompressed.flatten().cpu())
og_file_len = os.path.getsize(in_file)
if compressed is None:
@ -121,5 +121,5 @@ if __name__ == "__main__":
os.remove("./output/tmp.pt")
f.write(
f"{model_name},{model},{context_length},{dataset_type},{file},{og_file_len},{compressed_size},{accuracy},{compression_time},{decompression_time}\n"
f"{model_name},{model},{context_length},{dataset_type},{file},{og_file_len},{compressed_size},{mse_loss},{compression_time},{decompression_time}\n"
)

View file

@ -1,25 +1,25 @@
model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,match_percentage,compression_time,decompression_time
autoencoder,auto-genome-full-256.pt,256,genome,genome.fna,4699745,4259288,0.045625027269124985,644452283,28897895
autoencoder,auto-genome-full-256.pt,256,genome,genome_large.fna,23498433,21295512,0.04565964266657829,1963998714,8635843
autoencoder,auto-genome-full-256.pt,256,genome,genome_xlarge.fna,46996793,42591024,0.04573816433548927,3876085182,11520930
autoencoder,auto-genome-full-128.pt,128,genome,genome.fna,4699745,9399552,0.06625784933567047,390820600,5763825
autoencoder,auto-genome-full-128.pt,128,genome,genome_large.fna,23498433,46996992,0.06624297052621841,1958507860,11799390
autoencoder,auto-genome-full-128.pt,128,genome,genome_xlarge.fna,46996793,93993728,0.06629720330238342,3870420958,18796104
cnn,cnn-genome-full-256.pt,256,genome,genome_small.fna,1367,1743,1.0,994341526,890558285
cnn,cnn-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1343,1.0,677182893,679331692
cnn,cnn-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,1038,1.0,523037713,526992909
cnn,cnn-genome-full-128.pt,128,genome,genome_small.fna,1367,1682,1.0,825656141,822958302
cnn,cnn-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,1300,1.0,634440381,636023619
cnn,cnn-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1006,1.0,484945375,488047643
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text.txt,6488666,6184668,0.01631845347583294,539742390,7300344
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_large.txt,12977332,12369092,0.01635659858584404,1061523776,5894565
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xlarge.txt,25954664,24738184,0.01636260747909546,2125073233,8342673
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text.txt,6488666,12774636,0.03268468379974365,546880556,20773102
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_large.txt,12977332,25549272,0.032631129026412964,1068791093,63009268
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xlarge.txt,25954664,51098292,0.03263767808675766,2136859999,59107591
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,1561,1.0,675420011,669676566
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,1268,1.0,538098125,541272812
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,790,1.0,324025733,328011609
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,1129,1.0,619907688,627584572
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,882,1.0,503575405,505329493
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,571,1.0,307748207,311888322
model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time
autoencoder,auto-genome-full-256.pt,256,genome,genome.fna,4699745,4259288,83.62875366210938,636915773,27887947
autoencoder,auto-genome-full-256.pt,256,genome,genome_large.fna,23498433,21295512,83.59369659423828,1932602305,7778175
autoencoder,auto-genome-full-256.pt,256,genome,genome_xlarge.fna,46996793,42591024,83.58621215820312,3850901316,10996509
autoencoder,auto-genome-full-128.pt,128,genome,genome.fna,4699745,9399552,83.01229095458984,390656081,5804539
autoencoder,auto-genome-full-128.pt,128,genome,genome_large.fna,23498433,46996992,83.01190185546875,1932561312,10575739
autoencoder,auto-genome-full-128.pt,128,genome,genome_xlarge.fna,46996793,93993728,83.00253295898438,3873777067,18670984
cnn,cnn-genome-full-256.pt,256,genome,genome_small.fna,1367,1743,0.0,1029290599,890595665
cnn,cnn-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1343,0.0,686878467,683701323
cnn,cnn-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,1038,0.0,531354486,527072394
cnn,cnn-genome-full-128.pt,128,genome,genome_small.fna,1367,1682,0.0,829554150,851934528
cnn,cnn-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,1300,0.0,654742547,637221301
cnn,cnn-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1006,0.0,483840337,488870786
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text.txt,6488666,6184668,786.6799926757812,551986635,10536259
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_large.txt,12977332,12369092,786.6173706054688,1065897991,5763879
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xlarge.txt,25954664,24738184,786.6337890625,2139223055,8369164
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text.txt,6488666,12774636,206.2792510986328,545577194,20624030
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_large.txt,12977332,25549272,206.24131774902344,1073396133,60871642
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xlarge.txt,25954664,51098292,206.33023071289062,2145601924,59481825
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,1561,0.0,693378115,671294958
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,1268,0.0,550333502,550062973
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,790,0.0,333745012,332073466
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,1129,0.0,629310179,621317553
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,882,0.0,504538600,504907940
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,571,0.0,305443187,308964670

1 model_type model_name context_length dataset_type input_file_name original_file_size compressed_file_size match_percentage mse_loss compression_time decompression_time
2 autoencoder auto-genome-full-256.pt 256 genome genome.fna 4699745 4259288 0.045625027269124985 83.62875366210938 644452283 636915773 28897895 27887947
3 autoencoder auto-genome-full-256.pt 256 genome genome_large.fna 23498433 21295512 0.04565964266657829 83.59369659423828 1963998714 1932602305 8635843 7778175
4 autoencoder auto-genome-full-256.pt 256 genome genome_xlarge.fna 46996793 42591024 0.04573816433548927 83.58621215820312 3876085182 3850901316 11520930 10996509
5 autoencoder auto-genome-full-128.pt 128 genome genome.fna 4699745 9399552 0.06625784933567047 83.01229095458984 390820600 390656081 5763825 5804539
6 autoencoder auto-genome-full-128.pt 128 genome genome_large.fna 23498433 46996992 0.06624297052621841 83.01190185546875 1958507860 1932561312 11799390 10575739
7 autoencoder auto-genome-full-128.pt 128 genome genome_xlarge.fna 46996793 93993728 0.06629720330238342 83.00253295898438 3870420958 3873777067 18796104 18670984
8 cnn cnn-genome-full-256.pt 256 genome genome_small.fna 1367 1743 1.0 0.0 994341526 1029290599 890558285 890595665
9 cnn cnn-genome-full-256.pt 256 genome genome_xsmall.fna 1043 1343 1.0 0.0 677182893 686878467 679331692 683701323
10 cnn cnn-genome-full-256.pt 256 genome genome_xxsmall.fna 800 1038 1.0 0.0 523037713 531354486 526992909 527072394
11 cnn cnn-genome-full-128.pt 128 genome genome_small.fna 1367 1682 1.0 0.0 825656141 829554150 822958302 851934528
12 cnn cnn-genome-full-128.pt 128 genome genome_xsmall.fna 1043 1300 1.0 0.0 634440381 654742547 636023619 637221301
13 cnn cnn-genome-full-128.pt 128 genome genome_xxsmall.fna 800 1006 1.0 0.0 484945375 483840337 488047643 488870786
14 autoencoder auto-enwik9-full-256.pt 256 enwik9 text.txt 6488666 6184668 0.01631845347583294 786.6799926757812 539742390 551986635 7300344 10536259
15 autoencoder auto-enwik9-full-256.pt 256 enwik9 text_large.txt 12977332 12369092 0.01635659858584404 786.6173706054688 1061523776 1065897991 5894565 5763879
16 autoencoder auto-enwik9-full-256.pt 256 enwik9 text_xlarge.txt 25954664 24738184 0.01636260747909546 786.6337890625 2125073233 2139223055 8342673 8369164
17 autoencoder auto-enwik9-full-128.pt 128 enwik9 text.txt 6488666 12774636 0.03268468379974365 206.2792510986328 546880556 545577194 20773102 20624030
18 autoencoder auto-enwik9-full-128.pt 128 enwik9 text_large.txt 12977332 25549272 0.032631129026412964 206.24131774902344 1068791093 1073396133 63009268 60871642
19 autoencoder auto-enwik9-full-128.pt 128 enwik9 text_xlarge.txt 25954664 51098292 0.03263767808675766 206.33023071289062 2136859999 2145601924 59107591 59481825
20 cnn cnn-enwik9-full-256.pt 256 enwik9 text_small.txt 1022 1561 1.0 0.0 675420011 693378115 669676566 671294958
21 cnn cnn-enwik9-full-256.pt 256 enwik9 text_xsmall.txt 825 1268 1.0 0.0 538098125 550333502 541272812 550062973
22 cnn cnn-enwik9-full-256.pt 256 enwik9 text_xxsmall.txt 492 790 1.0 0.0 324025733 333745012 328011609 332073466
23 cnn cnn-enwik9-full-128.pt 128 enwik9 text_small.txt 1022 1129 1.0 0.0 619907688 629310179 627584572 621317553
24 cnn cnn-enwik9-full-128.pt 128 enwik9 text_xsmall.txt 825 882 1.0 0.0 503575405 504538600 505329493 504907940
25 cnn cnn-enwik9-full-128.pt 128 enwik9 text_xxsmall.txt 492 571 1.0 0.0 307748207 305443187 311888322 308964670