fix: accuracy replaced by MSE loss, updated graphs
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 21 KiB |
|
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
|
|
@ -38,7 +38,7 @@ if __name__ == "__main__":
|
|||
plt.tight_layout()
|
||||
plt.savefig(f"./graphs/{model_type}_{dataset_type}_decompression_time.png")
|
||||
|
||||
# accuracy
|
||||
# loss
|
||||
plt.figure(figsize=(10, 4))
|
||||
bar_height = 0.25
|
||||
files = model_df["input_file_name"].unique()
|
||||
|
|
@ -48,20 +48,20 @@ if __name__ == "__main__":
|
|||
|
||||
plt.barh(
|
||||
y - bar_height / 2,
|
||||
c256["match_percentage"] * 100,
|
||||
c256["mse_loss"],
|
||||
height=bar_height,
|
||||
label="256"
|
||||
)
|
||||
|
||||
plt.barh(
|
||||
y + bar_height / 2,
|
||||
c128["match_percentage"] * 100,
|
||||
c128["mse_loss"],
|
||||
height=bar_height,
|
||||
label="128"
|
||||
)
|
||||
plt.yticks(y, files, rotation=45, ha="right")
|
||||
plt.title(f"{model_type.capitalize()} accuracy for different context lengths")
|
||||
plt.xlabel("Accuracy")
|
||||
plt.title(f"{model_type.capitalize()} MSE loss for different context lengths")
|
||||
plt.xlabel("MSE loss")
|
||||
plt.ylabel("Filename")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
|
|
|
|||
16
measure.py
|
|
@ -2,6 +2,7 @@ import os
|
|||
from contextlib import contextmanager
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
import src.process as p
|
||||
|
||||
|
|
@ -26,17 +27,16 @@ def timer():
|
|||
def compare_files(original, decompressed: str | torch.Tensor):
|
||||
with open(original, "rb") as file:
|
||||
original = file.read()
|
||||
original = torch.tensor(list(original), dtype=torch.uint8).cpu()
|
||||
original = torch.tensor(list(original), dtype=torch.uint8).cpu().float()
|
||||
|
||||
if type(decompressed) == "str":
|
||||
with open(decompressed, "rb") as file:
|
||||
decompressed = file.read()
|
||||
decompressed = torch.tensor(list(decompressed), dtype=torch.uint8).cpu()
|
||||
decompressed = torch.tensor(list(decompressed), dtype=torch.uint8).cpu().float()
|
||||
|
||||
# count bytes matching
|
||||
count = torch.sum(original == decompressed[:original.shape[0]])
|
||||
accuracy = count / original.shape[0]
|
||||
return accuracy
|
||||
loss = F.mse_loss(decompressed[:original.shape[0]], original)
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -79,7 +79,7 @@ if __name__ == "__main__":
|
|||
with open("./results/compress/compression_results.csv", "w") as f:
|
||||
# write header
|
||||
f.write(
|
||||
"model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,match_percentage,compression_time,decompression_time\n"
|
||||
"model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time\n"
|
||||
)
|
||||
|
||||
for model, context_length, model_name, files in models:
|
||||
|
|
@ -110,7 +110,7 @@ if __name__ == "__main__":
|
|||
decompression_time = t()
|
||||
|
||||
|
||||
accuracy = compare_files(in_file, decompressed.flatten().cpu())
|
||||
mse_loss = compare_files(in_file, decompressed.flatten().cpu())
|
||||
|
||||
og_file_len = os.path.getsize(in_file)
|
||||
if compressed is None:
|
||||
|
|
@ -121,5 +121,5 @@ if __name__ == "__main__":
|
|||
os.remove("./output/tmp.pt")
|
||||
|
||||
f.write(
|
||||
f"{model_name},{model},{context_length},{dataset_type},{file},{og_file_len},{compressed_size},{accuracy},{compression_time},{decompression_time}\n"
|
||||
f"{model_name},{model},{context_length},{dataset_type},{file},{og_file_len},{compressed_size},{mse_loss},{compression_time},{decompression_time}\n"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,25 +1,25 @@
|
|||
model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,match_percentage,compression_time,decompression_time
|
||||
autoencoder,auto-genome-full-256.pt,256,genome,genome.fna,4699745,4259288,0.045625027269124985,644452283,28897895
|
||||
autoencoder,auto-genome-full-256.pt,256,genome,genome_large.fna,23498433,21295512,0.04565964266657829,1963998714,8635843
|
||||
autoencoder,auto-genome-full-256.pt,256,genome,genome_xlarge.fna,46996793,42591024,0.04573816433548927,3876085182,11520930
|
||||
autoencoder,auto-genome-full-128.pt,128,genome,genome.fna,4699745,9399552,0.06625784933567047,390820600,5763825
|
||||
autoencoder,auto-genome-full-128.pt,128,genome,genome_large.fna,23498433,46996992,0.06624297052621841,1958507860,11799390
|
||||
autoencoder,auto-genome-full-128.pt,128,genome,genome_xlarge.fna,46996793,93993728,0.06629720330238342,3870420958,18796104
|
||||
cnn,cnn-genome-full-256.pt,256,genome,genome_small.fna,1367,1743,1.0,994341526,890558285
|
||||
cnn,cnn-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1343,1.0,677182893,679331692
|
||||
cnn,cnn-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,1038,1.0,523037713,526992909
|
||||
cnn,cnn-genome-full-128.pt,128,genome,genome_small.fna,1367,1682,1.0,825656141,822958302
|
||||
cnn,cnn-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,1300,1.0,634440381,636023619
|
||||
cnn,cnn-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1006,1.0,484945375,488047643
|
||||
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text.txt,6488666,6184668,0.01631845347583294,539742390,7300344
|
||||
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_large.txt,12977332,12369092,0.01635659858584404,1061523776,5894565
|
||||
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xlarge.txt,25954664,24738184,0.01636260747909546,2125073233,8342673
|
||||
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text.txt,6488666,12774636,0.03268468379974365,546880556,20773102
|
||||
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_large.txt,12977332,25549272,0.032631129026412964,1068791093,63009268
|
||||
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xlarge.txt,25954664,51098292,0.03263767808675766,2136859999,59107591
|
||||
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,1561,1.0,675420011,669676566
|
||||
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,1268,1.0,538098125,541272812
|
||||
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,790,1.0,324025733,328011609
|
||||
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,1129,1.0,619907688,627584572
|
||||
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,882,1.0,503575405,505329493
|
||||
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,571,1.0,307748207,311888322
|
||||
model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time
|
||||
autoencoder,auto-genome-full-256.pt,256,genome,genome.fna,4699745,4259288,83.62875366210938,636915773,27887947
|
||||
autoencoder,auto-genome-full-256.pt,256,genome,genome_large.fna,23498433,21295512,83.59369659423828,1932602305,7778175
|
||||
autoencoder,auto-genome-full-256.pt,256,genome,genome_xlarge.fna,46996793,42591024,83.58621215820312,3850901316,10996509
|
||||
autoencoder,auto-genome-full-128.pt,128,genome,genome.fna,4699745,9399552,83.01229095458984,390656081,5804539
|
||||
autoencoder,auto-genome-full-128.pt,128,genome,genome_large.fna,23498433,46996992,83.01190185546875,1932561312,10575739
|
||||
autoencoder,auto-genome-full-128.pt,128,genome,genome_xlarge.fna,46996793,93993728,83.00253295898438,3873777067,18670984
|
||||
cnn,cnn-genome-full-256.pt,256,genome,genome_small.fna,1367,1743,0.0,1029290599,890595665
|
||||
cnn,cnn-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1343,0.0,686878467,683701323
|
||||
cnn,cnn-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,1038,0.0,531354486,527072394
|
||||
cnn,cnn-genome-full-128.pt,128,genome,genome_small.fna,1367,1682,0.0,829554150,851934528
|
||||
cnn,cnn-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,1300,0.0,654742547,637221301
|
||||
cnn,cnn-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1006,0.0,483840337,488870786
|
||||
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text.txt,6488666,6184668,786.6799926757812,551986635,10536259
|
||||
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_large.txt,12977332,12369092,786.6173706054688,1065897991,5763879
|
||||
autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xlarge.txt,25954664,24738184,786.6337890625,2139223055,8369164
|
||||
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text.txt,6488666,12774636,206.2792510986328,545577194,20624030
|
||||
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_large.txt,12977332,25549272,206.24131774902344,1073396133,60871642
|
||||
autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xlarge.txt,25954664,51098292,206.33023071289062,2145601924,59481825
|
||||
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,1561,0.0,693378115,671294958
|
||||
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,1268,0.0,550333502,550062973
|
||||
cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,790,0.0,333745012,332073466
|
||||
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,1129,0.0,629310179,621317553
|
||||
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,882,0.0,504538600,504907940
|
||||
cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,571,0.0,305443187,308964670
|
||||
|
|
|
|||
|