From 1143acc415cf4f8dd29c3c02ce2c4b8b6f1c9536 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Thu, 11 Dec 2025 22:45:46 +0100 Subject: [PATCH] chore: Replace firefox with 7zip (smaller) --- README.md | 14 +++++++++++--- config/local.sh | 13 ++++++++++++- config/sub.csv | 12 ++++++++---- config/urls.txt | 4 ++-- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 28058f6..e339dbc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # neural compression +## Running locally + +``` +uv sync --all-extras +``` + Example usage: ```shell @@ -21,10 +27,12 @@ python benchmark.py --debug compress \ --input-file inputfile --output-file outputfile ``` -## Running locally +Testing compression: -``` -uv sync --all-extras +```shell +bash config/download_datasets.sh config/urls.txt /home/tdpeuter/data/ml-inputs +bash config/generate_csv.sh > config/sub.csv +bash config/local.sh ``` ## Running on the Ghent University HPC diff --git a/config/local.sh b/config/local.sh index 91f79d5..e20ddf7 100644 --- a/config/local.sh +++ b/config/local.sh @@ -9,6 +9,8 @@ ID="${JOBID}-${GIT_HASH}-${DATE}" STAT_FILE="results/${ID}/results.csv" MODELS=/home/tdpeuter/data/ml-models +mkdir -p "results/${ID}" + while read -r line; do IFS=',' read -r id input model dataset context <<< "$line" @@ -16,11 +18,20 @@ while read -r line; do continue fi + output="results/${ID}/$(basename "${input}").${id}.pt" + python main.py compress \ --model-load-path "${MODELS}/${dataset}/${context}/${model}-1024.pt" \ --input-file "${input}" \ - --output-file "results/${ID}/${input}.pt" & + --output-file "${output}" + + in_bytes="$(stat -c %s -- "${input}")" + out_bytes="$(stat -c %s -- "${output}")" + + printf "%d,%s,%s,%s,%d,%d,%d\n" "$id" "$input" "$model" "$dataset" "$context" "$in_bytes" "$out_bytes" >> "${STAT_FILE}" + exit_code="${?}" + if [ "${exit_code}" -eq 0 ]; then echo "DONE" fi diff --git a/config/sub.csv b/config/sub.csv index 98fdf7a..1794775 100644 --- a/config/sub.csv +++ b/config/sub.csv @@ -1,5 +1,9 @@ id,input,model,dataset,context_size -0,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,enwik9,64 -1,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,human_reference,64 -2,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,enwik9,64 -3,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,human_reference,64 +0,/home/tdpeuter/data/ml-inputs/7z2501-x64.exe,cnn,enwik9,64 +1,/home/tdpeuter/data/ml-inputs/7z2501-x64.exe,cnn,human_reference,64 +2,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,enwik9,64 +3,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,human_reference,64 +4,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,enwik9,64 +5,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,human_reference,64 +6,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna.gz,cnn,enwik9,64 +7,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna.gz,cnn,human_reference,64 diff --git a/config/urls.txt b/config/urls.txt index 417b877..eaf8ef9 100644 --- a/config/urls.txt +++ b/config/urls.txt @@ -1,2 +1,2 @@ -https://download.mozilla.org/?product=firefox-latest&os=win&lang=en-US -https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz \ No newline at end of file +https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz +https://www.7-zip.org/a/7z2501-x64.exe \ No newline at end of file