From cad44c3be480f8b21130d643988f647fa2ad4aa2 Mon Sep 17 00:00:00 2001 From: RobinMeersman Date: Thu, 18 Dec 2025 21:22:51 +0100 Subject: [PATCH] feat: measured autoencoder for very small files as well --- measure.py | 10 ++-- results/compress/compression_results.csv | 48 +++++++++---------- .../compression_results_auto_small.csv | 13 +++++ 3 files changed, 42 insertions(+), 29 deletions(-) create mode 100644 results/compress/compression_results_auto_small.csv diff --git a/measure.py b/measure.py index 8012c0e..1eb83e7 100644 --- a/measure.py +++ b/measure.py @@ -65,18 +65,18 @@ if __name__ == "__main__": ] models = [ - ("auto-genome-full-256.pt", 256, "autoencoder", files_genome), - ("auto-genome-full-128.pt", 128, "autoencoder", files_genome), + ("auto-genome-full-256.pt", 256, "autoencoder", files_genome + files_genome_cnn), + ("auto-genome-full-128.pt", 128, "autoencoder", files_genome + files_genome_cnn), ("cnn-genome-full-256.pt", 256, "cnn", files_genome_cnn), ("cnn-genome-full-128.pt", 128, "cnn", files_genome_cnn), - ("auto-enwik9-full-256.pt", 256, "autoencoder", files_enwik9), - ("auto-enwik9-full-128.pt", 128, "autoencoder", files_enwik9), + ("auto-enwik9-full-256.pt", 256, "autoencoder", files_enwik9 + files_enwik9_cnn), + ("auto-enwik9-full-128.pt", 128, "autoencoder", files_enwik9 + files_enwik9_cnn), ("cnn-enwik9-full-256.pt", 256, "cnn", files_enwik9_cnn), ("cnn-enwik9-full-128.pt", 128, "cnn", files_enwik9_cnn), ] device = "cuda" if torch.cuda.is_available() else "cpu" - with open("./results/compress/compression_results.csv", "w") as f: + with open("./results/compress/compression_results_auto_small.csv", "w") as f: # write header f.write( "model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time\n" diff --git a/results/compress/compression_results.csv b/results/compress/compression_results.csv index 8925538..ac73398 100644 --- a/results/compress/compression_results.csv +++ b/results/compress/compression_results.csv @@ -1,25 +1,25 @@ model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time -autoencoder,auto-genome-full-256.pt,256,genome,genome.fna,4699745,4259288,83.62875366210938,636915773,27887947 -autoencoder,auto-genome-full-256.pt,256,genome,genome_large.fna,23498433,21295512,83.59369659423828,1932602305,7778175 -autoencoder,auto-genome-full-256.pt,256,genome,genome_xlarge.fna,46996793,42591024,83.58621215820312,3850901316,10996509 -autoencoder,auto-genome-full-128.pt,128,genome,genome.fna,4699745,9399552,83.01229095458984,390656081,5804539 -autoencoder,auto-genome-full-128.pt,128,genome,genome_large.fna,23498433,46996992,83.01190185546875,1932561312,10575739 -autoencoder,auto-genome-full-128.pt,128,genome,genome_xlarge.fna,46996793,93993728,83.00253295898438,3873777067,18670984 -cnn,cnn-genome-full-256.pt,256,genome,genome_small.fna,1367,1743,0.0,1029290599,890595665 -cnn,cnn-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1343,0.0,686878467,683701323 -cnn,cnn-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,1038,0.0,531354486,527072394 -cnn,cnn-genome-full-128.pt,128,genome,genome_small.fna,1367,1682,0.0,829554150,851934528 -cnn,cnn-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,1300,0.0,654742547,637221301 -cnn,cnn-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1006,0.0,483840337,488870786 -autoencoder,auto-enwik9-full-256.pt,256,enwik9,text.txt,6488666,6184668,786.6799926757812,551986635,10536259 -autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_large.txt,12977332,12369092,786.6173706054688,1065897991,5763879 -autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xlarge.txt,25954664,24738184,786.6337890625,2139223055,8369164 -autoencoder,auto-enwik9-full-128.pt,128,enwik9,text.txt,6488666,12774636,206.2792510986328,545577194,20624030 -autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_large.txt,12977332,25549272,206.24131774902344,1073396133,60871642 -autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xlarge.txt,25954664,51098292,206.33023071289062,2145601924,59481825 -cnn,cnn-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,1561,0.0,693378115,671294958 -cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,1268,0.0,550333502,550062973 -cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,790,0.0,333745012,332073466 -cnn,cnn-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,1129,0.0,629310179,621317553 -cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,882,0.0,504538600,504907940 -cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,571,0.0,305443187,308964670 +autoencoder,auto-genome-full-256.pt,256,genome,genome.fna,4699745,4259288,83.62875366210938,806860910,45252425 +autoencoder,auto-genome-full-256.pt,256,genome,genome_large.fna,23498433,21295512,83.59369659423828,2029608027,7888492 +autoencoder,auto-genome-full-256.pt,256,genome,genome_xlarge.fna,46996793,42591024,83.58621215820312,3893661459,11828570 +autoencoder,auto-genome-full-128.pt,128,genome,genome.fna,4699745,9399552,83.01229095458984,398403410,6667159 +autoencoder,auto-genome-full-128.pt,128,genome,genome_large.fna,23498433,46996992,83.01190185546875,2039699733,18597914 +autoencoder,auto-genome-full-128.pt,128,genome,genome_xlarge.fna,46996793,93993728,83.00253295898438,3914176203,19137168 +cnn,cnn-genome-full-256.pt,256,genome,genome_small.fna,1367,1743,0.0,1066176734,933083718 +cnn,cnn-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1343,0.0,741888902,796896774 +cnn,cnn-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,1038,0.0,542462371,712222705 +cnn,cnn-genome-full-128.pt,128,genome,genome_small.fna,1367,1682,0.0,967421587,1046043100 +cnn,cnn-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,1300,0.0,632559455,629773982 +cnn,cnn-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1006,0.0,481589710,488065247 +autoencoder,auto-enwik9-full-256.pt,256,enwik9,text.txt,6488666,6184668,786.6799926757812,564278454,16857177 +autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_large.txt,12977332,12369092,786.6173706054688,1079811645,6431677 +autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xlarge.txt,25954664,24738184,786.6337890625,2163648216,8789889 +autoencoder,auto-enwik9-full-128.pt,128,enwik9,text.txt,6488666,12774636,206.2792510986328,559830918,20974298 +autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_large.txt,12977332,25549272,206.24131774902344,1166762858,62279675 +autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xlarge.txt,25954664,51098292,206.33023071289062,2161884920,54375265 +cnn,cnn-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,1561,0.0,827390609,666215221 +cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,1268,0.0,669703293,539488777 +cnn,cnn-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,790,0.0,327867710,327564434 +cnn,cnn-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,1129,0.0,647518999,620040573 +cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,882,0.0,500985855,613062076 +cnn,cnn-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,571,0.0,302816347,308070825 diff --git a/results/compress/compression_results_auto_small.csv b/results/compress/compression_results_auto_small.csv new file mode 100644 index 0000000..0cd438e --- /dev/null +++ b/results/compress/compression_results_auto_small.csv @@ -0,0 +1,13 @@ +model_type,model_name,context_length,dataset_type,input_file_name,original_file_size,compressed_file_size,mse_loss,compression_time,decompression_time +autoencoder,auto-genome-full-256.pt,256,genome,genome_small.fna,1367,1392,220.29188537597656,237708436,29378622 +autoencoder,auto-genome-full-256.pt,256,genome,genome_xsmall.fna,1043,1160,263.97027587890625,5293080,4488761 +autoencoder,auto-genome-full-256.pt,256,genome,genome_xxsmall.fna,800,928,319.5350036621094,4873340,4381310 +autoencoder,auto-genome-full-128.pt,128,genome,genome_small.fna,1367,2816,120.52304077148438,251659317,5284661 +autoencoder,auto-genome-full-128.pt,128,genome,genome_xsmall.fna,1043,2304,133.8379669189453,152590538,4432469 +autoencoder,auto-genome-full-128.pt,128,genome,genome_xxsmall.fna,800,1792,159.2862548828125,153622653,4532139 +autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_small.txt,1022,976,746.8033447265625,6699780,22065927 +autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xsmall.txt,825,976,787.5345458984375,3715750,3665629 +autoencoder,auto-enwik9-full-256.pt,256,enwik9,text_xxsmall.txt,492,488,828.8658447265625,4892560,4176979 +autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_small.txt,1022,2016,231.42955017089844,5005880,4400159 +autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xsmall.txt,825,1764,236.55636596679688,5024449,4180100 +autoencoder,auto-enwik9-full-128.pt,128,enwik9,text_xxsmall.txt,492,1008,237.20529174804688,4856779,4145850