From 5de81819593ef830e986dde5651f5b5300f739d7 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Thu, 11 Dec 2025 14:41:20 +0100 Subject: [PATCH 1/3] chore: Add Nix configs --- config/configuration.nix | 207 +++++++++++++++++++++++++++++++++++++++ config/flake.lock | 151 ++++++++++++++++++++++++++++ config/flake.nix | 66 +++++++++++++ 3 files changed, 424 insertions(+) create mode 100644 config/configuration.nix create mode 100644 config/flake.lock create mode 100644 config/flake.nix diff --git a/config/configuration.nix b/config/configuration.nix new file mode 100644 index 0000000..649767d --- /dev/null +++ b/config/configuration.nix @@ -0,0 +1,207 @@ +# Edit this configuration file to define what should be installed on +# your system. Help is available in the configuration.nix(5) man page, on +# https://search.nixos.org/options and in the NixOS manual (`nixos-help`). + +{ config, lib, pkgs, ... }: + +{ + imports = + [ # Include the results of the hardware scan. + ./hardware-configuration.nix + ]; + + # Use the systemd-boot EFI boot loader. + boot.loader = { + systemd-boot.enable = true; + efi = { + efiSysMountPoint = "/boot/efi"; + canTouchEfiVariables = true; + }; + }; + + networking.hostName = "MachineLearning"; # Define your hostname. + # Pick only one of the below networking options. + # networking.wireless.enable = true; # Enables wireless support via wpa_supplicant. + # networking.networkmanager.enable = true; # Easiest to use and most distros use this by default. + + # Set your time zone. + time.timeZone = "Europe/Brussels"; + + # Configure network proxy if necessary + # networking.proxy.default = "http://user:password@proxy:port/"; + # networking.proxy.noProxy = "127.0.0.1,localhost,internal.domain"; + + # Select internationalisation properties. + # i18n.defaultLocale = "en_US.UTF-8"; + # console = { + # font = "Lat2-Terminus16"; + # keyMap = "us"; + # useXkbConfig = true; # use xkb.options in tty. + # }; + + # Enable the X11 windowing system. + services.xserver = { + #enable = true; + videoDrivers = [ + "nvidia" + ]; + }; + + # Configure keymap in X11 + # services.xserver.xkb.layout = "us"; + # services.xserver.xkb.options = "eurosign:e,caps:escape"; + + # Enable CUPS to print documents. + # services.printing.enable = true; + + # Enable sound. + # services.pulseaudio.enable = true; + # OR + # services.pipewire = { + # enable = true; + # pulse.enable = true; + # }; + + # Enable touchpad support (enabled default in most desktopManager). + # services.libinput.enable = true; + + # Define a user account. Don't forget to set a password with ‘passwd’. + # users.users.alice = { + # isNormalUser = true; + # extraGroups = [ "wheel" ]; # Enable ‘sudo’ for the user. + # packages = with pkgs; [ + # tree + # ]; + # }; + users.users = { + admin = { + description = "System Administrator"; + isNormalUser = true; + extraGroups = [ + config.users.groups.wheel.name # Enable 'sudo' for the user. + ]; + initialPassword = "ChangeMe"; + + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFdkZTYhBdUJ1YXx/2Iek0XC/jkbdxg37GORpXUgP2NO" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNSav7u6OxtxlAzq170/HuzE8cGvCULVGAiragtS5T6" + ]; + }; + + ml = { + description = "Machine Learning benchmarks"; + isNormalUser = true; + + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFdkZTYhBdUJ1YXx/2Iek0XC/jkbdxg37GORpXUgP2NO" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNSav7u6OxtxlAzq170/HuzE8cGvCULVGAiragtS5T6" + ]; + }; + }; + + # programs.firefox.enable = true; + + # List packages installed in system profile. + # You can use https://search.nixos.org/ to find more packages (and options). + environment.systemPackages = with pkgs; [ + vim + curl + git + wget + tmux + ]; + + hardware = { + graphics = { + enable = true; + enable32Bit = true; + extraPackages = with pkgs; [ + intel-ocl + intel-compute-runtime + intel-graphics-compiler + opencl-clhpp + opencl-headers + ocl-icd + ]; + }; + nvidia = { + modesetting.enable = true; + powerManagement.enable = false; + powerManagement.finegrained = false; + open = false; + nvidiaSettings = false; + package = config.boot.kernelPackages.nvidiaPackages.stable; + +# prime = { +# nvidiaBusId = "PCI:1:0:0"; +# intelBusId = "PCI:0:2:0"; +# }; + }; + }; + + # Some programs need SUID wrappers, can be configured further or are + # started in user sessions. + # programs.mtr.enable = true; + # programs.gnupg.agent = { + # enable = true; + # enableSSHSupport = true; + # }; + + nix.settings = { + substituters = [ + "https://cache.nixos-cuda.org" + ]; + trusted-public-keys = [ + "cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M=" + ]; + experimental-features = [ + "nix-command" + "flakes" + ]; + }; + + nixpkgs.config.allowUnfree = true; + + # List services that you want to enable: + + # Enable the OpenSSH daemon. + services.openssh = { + enable = true; + settings = { + PasswordAuthentication = false; + PermitRootLogin = "no"; + }; + }; + + # Open ports in the firewall. + # networking.firewall.allowedTCPPorts = [ ... ]; + # networking.firewall.allowedUDPPorts = [ ... ]; + # Or disable the firewall altogether. + # networking.firewall.enable = false; + + # Copy the NixOS configuration file and link it from the resulting system + # (/run/current-system/configuration.nix). This is useful in case you + # accidentally delete configuration.nix. + # system.copySystemConfiguration = true; + + # This option defines the first version of NixOS you have installed on this particular machine, + # and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions. + # + # Most users should NEVER change this value after the initial install, for any reason, + # even if you've upgraded your system to a new NixOS release. + # + # This value does NOT affect the Nixpkgs version your packages and OS are pulled from, + # so changing it will NOT upgrade your system - see https://nixos.org/manual/nixos/stable/#sec-upgrading for how + # to actually do that. + # + # This value being lower than the current NixOS release does NOT mean your system is + # out of date, out of support, or vulnerable. + # + # Do NOT change this value unless you have manually inspected all the changes it would make to your configuration, + # and migrated your data accordingly. + # + # For more information, see `man configuration.nix` or https://nixos.org/manual/nixos/stable/options#opt-system.stateVersion . + system.stateVersion = "25.05"; # Did you read the comment? + +} + diff --git a/config/flake.lock b/config/flake.lock new file mode 100644 index 0000000..16f7df5 --- /dev/null +++ b/config/flake.lock @@ -0,0 +1,151 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "inputs": { + "systems": [ + "nix-jetbrains-plugins", + "systems" + ] + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nix-jetbrains-plugins": { + "inputs": { + "flake-utils": "flake-utils_2", + "nixpkgs": "nixpkgs", + "systems": "systems_2" + }, + "locked": { + "lastModified": 1765025946, + "narHash": "sha256-ZSeAc3h08Lv67gbUjDMK6GTrQgYsrNpFNJEavCPxN8I=", + "owner": "theCapypara", + "repo": "nix-jetbrains-plugins", + "rev": "b861755ca1f4f7633ffdddc5608c32632cecebc3", + "type": "github" + }, + "original": { + "owner": "theCapypara", + "repo": "nix-jetbrains-plugins", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1757745802, + "narHash": "sha256-hLEO2TPj55KcUFUU1vgtHE9UEIOjRcH/4QbmfHNF820=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "c23193b943c6c689d70ee98ce3128239ed9e32d1", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-unstable": { + "locked": { + "lastModified": 1765186076, + "narHash": "sha256-hM20uyap1a0M9d344I692r+ik4gTMyj60cQWO+hAYP8=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "addf7cf5f383a3101ecfba091b98d0a1263dc9b8", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1764939437, + "narHash": "sha256-4TLFHUwXraw9Df5mXC/vCrJgb50CRr3CzUzF0Mn3CII=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "00d2457e2f608b4be6fe8b470b0a36816324b0ae", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nix-jetbrains-plugins": "nix-jetbrains-plugins", + "nixpkgs": "nixpkgs_2", + "nixpkgs-unstable": "nixpkgs-unstable" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/config/flake.nix b/config/flake.nix new file mode 100644 index 0000000..da326a6 --- /dev/null +++ b/config/flake.nix @@ -0,0 +1,66 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + nixpkgs-unstable.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + nix-jetbrains-plugins.url = "github:theCapypara/nix-jetbrains-plugins"; + }; + + outputs = { self, nixpkgs, nixpkgs-unstable, flake-utils, nix-jetbrains-plugins }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + }; + pkgs-unstable = import nixpkgs-unstable { + inherit system; + config.allowUnfree = true; + }; + + python-packages = p: with p; [ + numpy + ]; + + pluginList = [ + "be.ugent.piedcler.dodona" + "com.github.copilot" + "com.google.tools.ij.aiplugin" + "IdeaVIM" + ]; + + mkShell = pkgs.mkShell.override { + stdenv = pkgs.stdenvAdapters.useMoldLinker pkgs.stdenv; + }; + in { + devShells.default = pkgs.mkShell { + packages = (with pkgs; [ + python311 + (python-packages python311Packages) + + # CUDA + git gitRepo gnupg autoconf curl + procps gnumake util-linux m4 gperf unzip + cudatoolkit linuxPackages.nvidia_x11 + libGLU libGL + xorg.libXi xorg.libXmu freeglut + xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib + ncurses5 stdenv.cc binutils + ]) ++ (with pkgs-unstable; [ + uv + ]) ++ (with nix-jetbrains-plugins.lib."${system}"; [ + # Editor of your choice + #(buildIdeWithPlugins pkgs-unstable.jetbrains "pycharm-professional" pluginList) + ]); + + # CUDA + CUDA_PATH = pkgs.cudatoolkit; + # ImportError: libstdc++.so.6: cannot open shared object file: No such file or directory + LD_LIBRARY_PATH = "${pkgs.linuxPackages.nvidia_x11}/lib:${pkgs.ncurses5}/lib:${pkgs.libGL}/lib/:${pkgs.stdenv.cc.cc.lib}/lib/:${pkgs.glibc}/lib"; + EXTRA_LDFLAGS = "-L/lib -L${pkgs.linuxPackages.nvidia_x11}/lib"; + EXTRA_CCFLAGS = "-I/usr/include"; + + # Stop uv from downloading Python binaries automatically if needed. + UV_PYTHON_DOWNLOADS = "never"; + }; + }); +} From ff11c1deb38cb1d582ed1e54ca8ae64145af9492 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Thu, 11 Dec 2025 22:21:47 +0100 Subject: [PATCH 2/3] Add testing configs --- README.md | 17 ++++- config/download_datasets.sh | 95 ++++++++++++++++++++++++++ config/generate_csv.sh | 106 +++++++++++++++++++++++++++++ config/local.sh | 27 ++++++++ config/{ => nix}/configuration.nix | 0 config/{ => nix}/flake.lock | 0 config/{ => nix}/flake.nix | 0 config/sub.csv | 5 ++ config/urls.txt | 2 + 9 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 config/download_datasets.sh create mode 100644 config/generate_csv.sh create mode 100644 config/local.sh rename config/{ => nix}/configuration.nix (100%) rename config/{ => nix}/flake.lock (100%) rename config/{ => nix}/flake.nix (100%) create mode 100644 config/sub.csv create mode 100644 config/urls.txt diff --git a/README.md b/README.md index 2b0b5f7..28058f6 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,22 @@ Example usage: ```shell -python main.py --debug train --dataset enwik9 --data-root ~/data/datasets/ml --method optuna --model transformer --model-save-path ~/data/ml-models/test-transformer.pt +# Fetching +python main.py --debug train --method fetch \ + --dataset enwik9 --data-root /path/to/datasets -python benchmark.py --debug train --dataset enwik9 --data-root ~/data/datasets/ml --method optuna --model cnn --model-save-path ~/data/ml-models/test-cnn.pt +# Training +python main.py --debug train --method optuna \ + --dataset enwik9 --data-root /path/to/datasets \ + --model cnn --model-save-path /path/to/optuna-model +python main.py --debug --results /path/to/results train --method full \ + --dataset enwik9 --data-root /path/to/datasets \ + --model-load-path /path/to/optuna-model --model-save-path /path/to/full-model + +# Compressing +python benchmark.py --debug compress \ + --model-load-path /path/to/full-model \ + --input-file inputfile --output-file outputfile ``` ## Running locally diff --git a/config/download_datasets.sh b/config/download_datasets.sh new file mode 100644 index 0000000..d76147d --- /dev/null +++ b/config/download_datasets.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# Download all URLs (one per line) from a txt file into a destination directory. +# This script is written by Copilot + +set -uo pipefail + +usage() { + echo "Usage: $0 " + echo "Example: $0 urls.txt ~/Downloads/files" + exit 1 +} + +# ---- Args & prerequisites ---- +[[ $# -ne 2 ]] && usage + +URLS_FILE="$1" +DEST_DIR="$2" + +if [[ ! -f "$URLS_FILE" ]]; then + echo "Error: URL list file not found: $URLS_FILE" >&2 + exit 2 +fi + +mkdir -p "$DEST_DIR" || { + echo "Error: Cannot create/access destination directory: $DEST_DIR" >&2 + exit 3 +} + +# Prefer curl if available; otherwise try wget +DOWNLOADER="" +if command -v wget >/dev/null 2>&1; then + DOWNLOADER="wget" +else + echo "Error: Neither 'curl' nor 'wget' found. Please install one." >&2 + exit 4 +fi + +echo "Using downloader: $DOWNLOADER" +echo "Reading URLs from: $URLS_FILE" +echo "Saving to: $DEST_DIR" +echo + +# ---- Download loop ---- +# Reads lines including the last one even if it lacks a trailing newline. +while IFS= read -r url || [[ -n "$url" ]]; do + # Skip empty lines and comments + [[ -z "$url" ]] && continue + [[ "$url" =~ ^[[:space:]]*# ]] && continue + + # Optional: strip leading/trailing whitespace + url="$(printf '%s' "$url" | awk '{$1=$1;print}')" + + # Basic scheme check + if ! [[ "$url" =~ ^https?:// ]]; then + echo "Skipping (invalid URL scheme): $url" >&2 + continue + fi + + echo "→ Downloading: $url" + + if [[ "$DOWNLOADER" == "curl" ]]; then + # -f fail on HTTP errors + # -L follow redirects + # -C - resume if possible + # --retry 3 retry transient failures + # -OJ save using server-provided filename (Content-Disposition) if present + # (cd to dest so curl -O/-OJ writes there) + ( + cd "$DEST_DIR" && \ + curl -fL -C - --retry 3 --remote-header-name -OJ "$url" + ) || { + echo " ⚠️ Failed: $url" >&2 + } + else + # wget: + # --content-disposition: respect server-provided filename + # --tries=3, --timeout=10: retry/transient handling + # --directory-prefix: write to dest + # --no-clobber: skip file if it already exists + wget -q --content-disposition --tries=3 --timeout=10 \ + --directory-prefix="$DEST_DIR" --no-clobber "$url" || { + echo " ⚠️ Failed: $url" >&2 + } + fi + + # Extract .gz files + if [[ "$url" =~ \.gz$ ]]; then + filename="${url##*/}" + echo "Extracting: $filename" + gunzip "$DEST_DIR/${filename}" + fi +done < "$URLS_FILE" + +echo +echo "✅ Done. Files saved in: $DEST_DIR" diff --git a/config/generate_csv.sh b/config/generate_csv.sh new file mode 100644 index 0000000..1d4fae1 --- /dev/null +++ b/config/generate_csv.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# Generate a CSV that enumerates a test grid for your Python benchmarking script. +# Columns: model,context_size,extra_args +# +# Example: +# ./generate_grid_csv.sh > grid.csv +# ./generate_grid_csv.sh -o grid.csv +# +# You can customize the axes below (MODELS, CONTEXTS, TEMPERATURES, MAX_TOKENS) +# and add common extra args (COMMON_EXTRA). All fields are safely CSV-quoted. + +set -euo pipefail + +OUT_FILE="" +SHOW_HELP=false + +usage() { + cat <<'EOF' +Usage: + generate_grid_csv.sh [-o output.csv] + +Options: + -o Write CSV to this file instead of stdout + -h Show this help + +Customize the axes by editing arrays in the script: + MODELS, CONTEXTS, TEMPERATURES, MAX_TOKENS, COMMON_EXTRA + +Examples: + ./generate_grid_csv.sh > grid.csv + ./generate_grid_csv.sh -o grid.csv + +Tip: + You can also override arrays via env vars (space-separated), e.g.: + MODELS="gpt-4o-mini llama-3.1-8b" CONTEXTS="4096 8192" ./generate_grid_csv.sh > grid.csv +EOF +} + +# --- Parse flags --- +while getopts ":o:h" opt; do + case "$opt" in + o) OUT_FILE="$OPTARG" ;; + h) SHOW_HELP=true ;; + \?) echo "Invalid option: -$OPTARG" >&2; usage; exit 2 ;; + :) echo "Option -$OPTARG requires an argument." >&2; exit 2 ;; + esac +done +shift $((OPTIND - 1)) + +$SHOW_HELP && { usage; exit 0; } + +# --- Axes (edit or override via env) --- +# You can override these by exporting env vars before running, e.g.: +# export MODELS="gpt-4o-mini llama-3.1-8b" +# shellcheck disable=SC2206 +DATASETS=${DATASETS:-"enwik9 human_reference"} +CONTEXTS=${CONTEXTS:-"64"} + +# Convert space-separated env vars to bash arrays +# shellcheck disable=SC2206 +DATASETS_ARR=($DATASETS) +CONTEXTS_ARR=($CONTEXTS) + +# --- CSV helpers --- +csv_escape() { + # Escape double quotes by doubling them, and wrap the whole field in quotes. + local s="$1" + s=${s//\"/\"\"} + printf '%s' "$s" +} + +emit() { + # Write to file or stdout + if [[ -n "$OUT_FILE" ]]; then + printf "%s\n" "$1" >> "$OUT_FILE" + else + printf "%s\n" "$1" + fi +} + +# Prepare output +if [[ -n "$OUT_FILE" ]]; then + : > "$OUT_FILE" # truncate/initialize +fi + +# Header +emit "id,input,model,dataset,context_size" + +# --- Generate rows (Cartesian product) --- +id=0 +model="cnn" +for file in /home/tdpeuter/data/ml-inputs/*; do + for dataset in "${DATASETS_ARR[@]}"; do + for ctx in "${CONTEXTS_ARR[@]}"; do + # CSV-quote each field + row="${id},$(csv_escape "${file}"),$(csv_escape "${model}"),$(csv_escape "${dataset}"),$ctx" + emit "$row" + id=$((id+1)) + done + done +done + +# Done +if [[ -n "$OUT_FILE" ]]; then + echo "CSV written to: $OUT_FILE" +fi diff --git a/config/local.sh b/config/local.sh new file mode 100644 index 0000000..91f79d5 --- /dev/null +++ b/config/local.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +INPUT_FILE="config/sub.csv" + +JOBID="$(date +%s | tail -c 9)" +GIT_HASH="$(git rev-parse --short HEAD)" +DATE="$(date "+%Y%m%d")" +ID="${JOBID}-${GIT_HASH}-${DATE}" +STAT_FILE="results/${ID}/results.csv" +MODELS=/home/tdpeuter/data/ml-models + +while read -r line; do + IFS=',' read -r id input model dataset context <<< "$line" + + if [[ "${id}" == "id" ]]; then + continue + fi + + python main.py compress \ + --model-load-path "${MODELS}/${dataset}/${context}/${model}-1024.pt" \ + --input-file "${input}" \ + --output-file "results/${ID}/${input}.pt" & + exit_code="${?}" + if [ "${exit_code}" -eq 0 ]; then + echo "DONE" + fi +done < "${INPUT_FILE}" diff --git a/config/configuration.nix b/config/nix/configuration.nix similarity index 100% rename from config/configuration.nix rename to config/nix/configuration.nix diff --git a/config/flake.lock b/config/nix/flake.lock similarity index 100% rename from config/flake.lock rename to config/nix/flake.lock diff --git a/config/flake.nix b/config/nix/flake.nix similarity index 100% rename from config/flake.nix rename to config/nix/flake.nix diff --git a/config/sub.csv b/config/sub.csv new file mode 100644 index 0000000..98fdf7a --- /dev/null +++ b/config/sub.csv @@ -0,0 +1,5 @@ +id,input,model,dataset,context_size +0,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,enwik9,64 +1,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,human_reference,64 +2,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,enwik9,64 +3,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,human_reference,64 diff --git a/config/urls.txt b/config/urls.txt new file mode 100644 index 0000000..417b877 --- /dev/null +++ b/config/urls.txt @@ -0,0 +1,2 @@ +https://download.mozilla.org/?product=firefox-latest&os=win&lang=en-US +https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz \ No newline at end of file From 1143acc415cf4f8dd29c3c02ce2c4b8b6f1c9536 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Thu, 11 Dec 2025 22:45:46 +0100 Subject: [PATCH 3/3] chore: Replace firefox with 7zip (smaller) --- README.md | 14 +++++++++++--- config/local.sh | 13 ++++++++++++- config/sub.csv | 12 ++++++++---- config/urls.txt | 4 ++-- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 28058f6..e339dbc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # neural compression +## Running locally + +``` +uv sync --all-extras +``` + Example usage: ```shell @@ -21,10 +27,12 @@ python benchmark.py --debug compress \ --input-file inputfile --output-file outputfile ``` -## Running locally +Testing compression: -``` -uv sync --all-extras +```shell +bash config/download_datasets.sh config/urls.txt /home/tdpeuter/data/ml-inputs +bash config/generate_csv.sh > config/sub.csv +bash config/local.sh ``` ## Running on the Ghent University HPC diff --git a/config/local.sh b/config/local.sh index 91f79d5..e20ddf7 100644 --- a/config/local.sh +++ b/config/local.sh @@ -9,6 +9,8 @@ ID="${JOBID}-${GIT_HASH}-${DATE}" STAT_FILE="results/${ID}/results.csv" MODELS=/home/tdpeuter/data/ml-models +mkdir -p "results/${ID}" + while read -r line; do IFS=',' read -r id input model dataset context <<< "$line" @@ -16,11 +18,20 @@ while read -r line; do continue fi + output="results/${ID}/$(basename "${input}").${id}.pt" + python main.py compress \ --model-load-path "${MODELS}/${dataset}/${context}/${model}-1024.pt" \ --input-file "${input}" \ - --output-file "results/${ID}/${input}.pt" & + --output-file "${output}" + + in_bytes="$(stat -c %s -- "${input}")" + out_bytes="$(stat -c %s -- "${output}")" + + printf "%d,%s,%s,%s,%d,%d,%d\n" "$id" "$input" "$model" "$dataset" "$context" "$in_bytes" "$out_bytes" >> "${STAT_FILE}" + exit_code="${?}" + if [ "${exit_code}" -eq 0 ]; then echo "DONE" fi diff --git a/config/sub.csv b/config/sub.csv index 98fdf7a..1794775 100644 --- a/config/sub.csv +++ b/config/sub.csv @@ -1,5 +1,9 @@ id,input,model,dataset,context_size -0,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,enwik9,64 -1,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,human_reference,64 -2,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,enwik9,64 -3,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,human_reference,64 +0,/home/tdpeuter/data/ml-inputs/7z2501-x64.exe,cnn,enwik9,64 +1,/home/tdpeuter/data/ml-inputs/7z2501-x64.exe,cnn,human_reference,64 +2,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,enwik9,64 +3,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,human_reference,64 +4,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,enwik9,64 +5,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,human_reference,64 +6,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna.gz,cnn,enwik9,64 +7,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna.gz,cnn,human_reference,64 diff --git a/config/urls.txt b/config/urls.txt index 417b877..eaf8ef9 100644 --- a/config/urls.txt +++ b/config/urls.txt @@ -1,2 +1,2 @@ -https://download.mozilla.org/?product=firefox-latest&os=win&lang=en-US -https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz \ No newline at end of file +https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz +https://www.7-zip.org/a/7z2501-x64.exe \ No newline at end of file