Merge branch 'config' into process
This commit is contained in:
commit
7b9202e64f
9 changed files with 703 additions and 8 deletions
37
README.md
37
README.md
|
|
@ -1,19 +1,40 @@
|
||||||
# neural compression
|
# neural compression
|
||||||
|
|
||||||
Example usage:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
python main.py --debug train --dataset enwik9 --data-root ~/data/datasets/ml --method optuna --model transformer --model-save-path ~/data/ml-models/test-transformer.pt
|
|
||||||
|
|
||||||
python benchmark.py --debug train --dataset enwik9 --data-root ~/data/datasets/ml --method optuna --model cnn --model-save-path ~/data/ml-models/test-cnn.pt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Running locally
|
## Running locally
|
||||||
|
|
||||||
```
|
```
|
||||||
uv sync --all-extras
|
uv sync --all-extras
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Fetching
|
||||||
|
python main.py --debug train --method fetch \
|
||||||
|
--dataset enwik9 --data-root /path/to/datasets
|
||||||
|
|
||||||
|
# Training
|
||||||
|
python main.py --debug train --method optuna \
|
||||||
|
--dataset enwik9 --data-root /path/to/datasets \
|
||||||
|
--model cnn --model-save-path /path/to/optuna-model
|
||||||
|
python main.py --debug --results /path/to/results train --method full \
|
||||||
|
--dataset enwik9 --data-root /path/to/datasets \
|
||||||
|
--model-load-path /path/to/optuna-model --model-save-path /path/to/full-model
|
||||||
|
|
||||||
|
# Compressing
|
||||||
|
python benchmark.py --debug compress \
|
||||||
|
--model-load-path /path/to/full-model \
|
||||||
|
--input-file inputfile --output-file outputfile
|
||||||
|
```
|
||||||
|
|
||||||
|
Testing compression:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
bash config/download_datasets.sh config/urls.txt /home/tdpeuter/data/ml-inputs
|
||||||
|
bash config/generate_csv.sh > config/sub.csv
|
||||||
|
bash config/local.sh
|
||||||
|
```
|
||||||
|
|
||||||
## Running on the Ghent University HPC
|
## Running on the Ghent University HPC
|
||||||
|
|
||||||
See the [Infrastructure docs](https://docs.hpc.ugent.be/infrastructure/#gpu-clusters) for more information about the clusters.
|
See the [Infrastructure docs](https://docs.hpc.ugent.be/infrastructure/#gpu-clusters) for more information about the clusters.
|
||||||
|
|
|
||||||
95
config/download_datasets.sh
Normal file
95
config/download_datasets.sh
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Download all URLs (one per line) from a txt file into a destination directory.
|
||||||
|
# This script is written by Copilot
|
||||||
|
|
||||||
|
set -uo pipefail
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 <urls.txt> <destination_dir>"
|
||||||
|
echo "Example: $0 urls.txt ~/Downloads/files"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---- Args & prerequisites ----
|
||||||
|
[[ $# -ne 2 ]] && usage
|
||||||
|
|
||||||
|
URLS_FILE="$1"
|
||||||
|
DEST_DIR="$2"
|
||||||
|
|
||||||
|
if [[ ! -f "$URLS_FILE" ]]; then
|
||||||
|
echo "Error: URL list file not found: $URLS_FILE" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$DEST_DIR" || {
|
||||||
|
echo "Error: Cannot create/access destination directory: $DEST_DIR" >&2
|
||||||
|
exit 3
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prefer curl if available; otherwise try wget
|
||||||
|
DOWNLOADER=""
|
||||||
|
if command -v wget >/dev/null 2>&1; then
|
||||||
|
DOWNLOADER="wget"
|
||||||
|
else
|
||||||
|
echo "Error: Neither 'curl' nor 'wget' found. Please install one." >&2
|
||||||
|
exit 4
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Using downloader: $DOWNLOADER"
|
||||||
|
echo "Reading URLs from: $URLS_FILE"
|
||||||
|
echo "Saving to: $DEST_DIR"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---- Download loop ----
|
||||||
|
# Reads lines including the last one even if it lacks a trailing newline.
|
||||||
|
while IFS= read -r url || [[ -n "$url" ]]; do
|
||||||
|
# Skip empty lines and comments
|
||||||
|
[[ -z "$url" ]] && continue
|
||||||
|
[[ "$url" =~ ^[[:space:]]*# ]] && continue
|
||||||
|
|
||||||
|
# Optional: strip leading/trailing whitespace
|
||||||
|
url="$(printf '%s' "$url" | awk '{$1=$1;print}')"
|
||||||
|
|
||||||
|
# Basic scheme check
|
||||||
|
if ! [[ "$url" =~ ^https?:// ]]; then
|
||||||
|
echo "Skipping (invalid URL scheme): $url" >&2
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "→ Downloading: $url"
|
||||||
|
|
||||||
|
if [[ "$DOWNLOADER" == "curl" ]]; then
|
||||||
|
# -f fail on HTTP errors
|
||||||
|
# -L follow redirects
|
||||||
|
# -C - resume if possible
|
||||||
|
# --retry 3 retry transient failures
|
||||||
|
# -OJ save using server-provided filename (Content-Disposition) if present
|
||||||
|
# (cd to dest so curl -O/-OJ writes there)
|
||||||
|
(
|
||||||
|
cd "$DEST_DIR" && \
|
||||||
|
curl -fL -C - --retry 3 --remote-header-name -OJ "$url"
|
||||||
|
) || {
|
||||||
|
echo " ⚠️ Failed: $url" >&2
|
||||||
|
}
|
||||||
|
else
|
||||||
|
# wget:
|
||||||
|
# --content-disposition: respect server-provided filename
|
||||||
|
# --tries=3, --timeout=10: retry/transient handling
|
||||||
|
# --directory-prefix: write to dest
|
||||||
|
# --no-clobber: skip file if it already exists
|
||||||
|
wget -q --content-disposition --tries=3 --timeout=10 \
|
||||||
|
--directory-prefix="$DEST_DIR" --no-clobber "$url" || {
|
||||||
|
echo " ⚠️ Failed: $url" >&2
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract .gz files
|
||||||
|
if [[ "$url" =~ \.gz$ ]]; then
|
||||||
|
filename="${url##*/}"
|
||||||
|
echo "Extracting: $filename"
|
||||||
|
gunzip "$DEST_DIR/${filename}"
|
||||||
|
fi
|
||||||
|
done < "$URLS_FILE"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "✅ Done. Files saved in: $DEST_DIR"
|
||||||
106
config/generate_csv.sh
Normal file
106
config/generate_csv.sh
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Generate a CSV that enumerates a test grid for your Python benchmarking script.
|
||||||
|
# Columns: model,context_size,extra_args
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# ./generate_grid_csv.sh > grid.csv
|
||||||
|
# ./generate_grid_csv.sh -o grid.csv
|
||||||
|
#
|
||||||
|
# You can customize the axes below (MODELS, CONTEXTS, TEMPERATURES, MAX_TOKENS)
|
||||||
|
# and add common extra args (COMMON_EXTRA). All fields are safely CSV-quoted.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
OUT_FILE=""
|
||||||
|
SHOW_HELP=false
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<'EOF'
|
||||||
|
Usage:
|
||||||
|
generate_grid_csv.sh [-o output.csv]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-o <file> Write CSV to this file instead of stdout
|
||||||
|
-h Show this help
|
||||||
|
|
||||||
|
Customize the axes by editing arrays in the script:
|
||||||
|
MODELS, CONTEXTS, TEMPERATURES, MAX_TOKENS, COMMON_EXTRA
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
./generate_grid_csv.sh > grid.csv
|
||||||
|
./generate_grid_csv.sh -o grid.csv
|
||||||
|
|
||||||
|
Tip:
|
||||||
|
You can also override arrays via env vars (space-separated), e.g.:
|
||||||
|
MODELS="gpt-4o-mini llama-3.1-8b" CONTEXTS="4096 8192" ./generate_grid_csv.sh > grid.csv
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Parse flags ---
|
||||||
|
while getopts ":o:h" opt; do
|
||||||
|
case "$opt" in
|
||||||
|
o) OUT_FILE="$OPTARG" ;;
|
||||||
|
h) SHOW_HELP=true ;;
|
||||||
|
\?) echo "Invalid option: -$OPTARG" >&2; usage; exit 2 ;;
|
||||||
|
:) echo "Option -$OPTARG requires an argument." >&2; exit 2 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
shift $((OPTIND - 1))
|
||||||
|
|
||||||
|
$SHOW_HELP && { usage; exit 0; }
|
||||||
|
|
||||||
|
# --- Axes (edit or override via env) ---
|
||||||
|
# You can override these by exporting env vars before running, e.g.:
|
||||||
|
# export MODELS="gpt-4o-mini llama-3.1-8b"
|
||||||
|
# shellcheck disable=SC2206
|
||||||
|
DATASETS=${DATASETS:-"enwik9 human_reference"}
|
||||||
|
CONTEXTS=${CONTEXTS:-"64"}
|
||||||
|
|
||||||
|
# Convert space-separated env vars to bash arrays
|
||||||
|
# shellcheck disable=SC2206
|
||||||
|
DATASETS_ARR=($DATASETS)
|
||||||
|
CONTEXTS_ARR=($CONTEXTS)
|
||||||
|
|
||||||
|
# --- CSV helpers ---
|
||||||
|
csv_escape() {
|
||||||
|
# Escape double quotes by doubling them, and wrap the whole field in quotes.
|
||||||
|
local s="$1"
|
||||||
|
s=${s//\"/\"\"}
|
||||||
|
printf '%s' "$s"
|
||||||
|
}
|
||||||
|
|
||||||
|
emit() {
|
||||||
|
# Write to file or stdout
|
||||||
|
if [[ -n "$OUT_FILE" ]]; then
|
||||||
|
printf "%s\n" "$1" >> "$OUT_FILE"
|
||||||
|
else
|
||||||
|
printf "%s\n" "$1"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepare output
|
||||||
|
if [[ -n "$OUT_FILE" ]]; then
|
||||||
|
: > "$OUT_FILE" # truncate/initialize
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Header
|
||||||
|
emit "id,input,model,dataset,context_size"
|
||||||
|
|
||||||
|
# --- Generate rows (Cartesian product) ---
|
||||||
|
id=0
|
||||||
|
model="cnn"
|
||||||
|
for file in /home/tdpeuter/data/ml-inputs/*; do
|
||||||
|
for dataset in "${DATASETS_ARR[@]}"; do
|
||||||
|
for ctx in "${CONTEXTS_ARR[@]}"; do
|
||||||
|
# CSV-quote each field
|
||||||
|
row="${id},$(csv_escape "${file}"),$(csv_escape "${model}"),$(csv_escape "${dataset}"),$ctx"
|
||||||
|
emit "$row"
|
||||||
|
id=$((id+1))
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Done
|
||||||
|
if [[ -n "$OUT_FILE" ]]; then
|
||||||
|
echo "CSV written to: $OUT_FILE"
|
||||||
|
fi
|
||||||
38
config/local.sh
Normal file
38
config/local.sh
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
INPUT_FILE="config/sub.csv"
|
||||||
|
|
||||||
|
JOBID="$(date +%s | tail -c 9)"
|
||||||
|
GIT_HASH="$(git rev-parse --short HEAD)"
|
||||||
|
DATE="$(date "+%Y%m%d")"
|
||||||
|
ID="${JOBID}-${GIT_HASH}-${DATE}"
|
||||||
|
STAT_FILE="results/${ID}/results.csv"
|
||||||
|
MODELS=/home/tdpeuter/data/ml-models
|
||||||
|
|
||||||
|
mkdir -p "results/${ID}"
|
||||||
|
|
||||||
|
while read -r line; do
|
||||||
|
IFS=',' read -r id input model dataset context <<< "$line"
|
||||||
|
|
||||||
|
if [[ "${id}" == "id" ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
output="results/${ID}/$(basename "${input}").${id}.pt"
|
||||||
|
|
||||||
|
python main.py compress \
|
||||||
|
--model-load-path "${MODELS}/${dataset}/${context}/${model}-1024.pt" \
|
||||||
|
--input-file "${input}" \
|
||||||
|
--output-file "${output}"
|
||||||
|
|
||||||
|
in_bytes="$(stat -c %s -- "${input}")"
|
||||||
|
out_bytes="$(stat -c %s -- "${output}")"
|
||||||
|
|
||||||
|
printf "%d,%s,%s,%s,%d,%d,%d\n" "$id" "$input" "$model" "$dataset" "$context" "$in_bytes" "$out_bytes" >> "${STAT_FILE}"
|
||||||
|
|
||||||
|
exit_code="${?}"
|
||||||
|
|
||||||
|
if [ "${exit_code}" -eq 0 ]; then
|
||||||
|
echo "DONE"
|
||||||
|
fi
|
||||||
|
done < "${INPUT_FILE}"
|
||||||
207
config/nix/configuration.nix
Normal file
207
config/nix/configuration.nix
Normal file
|
|
@ -0,0 +1,207 @@
|
||||||
|
# Edit this configuration file to define what should be installed on
|
||||||
|
# your system. Help is available in the configuration.nix(5) man page, on
|
||||||
|
# https://search.nixos.org/options and in the NixOS manual (`nixos-help`).
|
||||||
|
|
||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports =
|
||||||
|
[ # Include the results of the hardware scan.
|
||||||
|
./hardware-configuration.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
# Use the systemd-boot EFI boot loader.
|
||||||
|
boot.loader = {
|
||||||
|
systemd-boot.enable = true;
|
||||||
|
efi = {
|
||||||
|
efiSysMountPoint = "/boot/efi";
|
||||||
|
canTouchEfiVariables = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.hostName = "MachineLearning"; # Define your hostname.
|
||||||
|
# Pick only one of the below networking options.
|
||||||
|
# networking.wireless.enable = true; # Enables wireless support via wpa_supplicant.
|
||||||
|
# networking.networkmanager.enable = true; # Easiest to use and most distros use this by default.
|
||||||
|
|
||||||
|
# Set your time zone.
|
||||||
|
time.timeZone = "Europe/Brussels";
|
||||||
|
|
||||||
|
# Configure network proxy if necessary
|
||||||
|
# networking.proxy.default = "http://user:password@proxy:port/";
|
||||||
|
# networking.proxy.noProxy = "127.0.0.1,localhost,internal.domain";
|
||||||
|
|
||||||
|
# Select internationalisation properties.
|
||||||
|
# i18n.defaultLocale = "en_US.UTF-8";
|
||||||
|
# console = {
|
||||||
|
# font = "Lat2-Terminus16";
|
||||||
|
# keyMap = "us";
|
||||||
|
# useXkbConfig = true; # use xkb.options in tty.
|
||||||
|
# };
|
||||||
|
|
||||||
|
# Enable the X11 windowing system.
|
||||||
|
services.xserver = {
|
||||||
|
#enable = true;
|
||||||
|
videoDrivers = [
|
||||||
|
"nvidia"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
# Configure keymap in X11
|
||||||
|
# services.xserver.xkb.layout = "us";
|
||||||
|
# services.xserver.xkb.options = "eurosign:e,caps:escape";
|
||||||
|
|
||||||
|
# Enable CUPS to print documents.
|
||||||
|
# services.printing.enable = true;
|
||||||
|
|
||||||
|
# Enable sound.
|
||||||
|
# services.pulseaudio.enable = true;
|
||||||
|
# OR
|
||||||
|
# services.pipewire = {
|
||||||
|
# enable = true;
|
||||||
|
# pulse.enable = true;
|
||||||
|
# };
|
||||||
|
|
||||||
|
# Enable touchpad support (enabled default in most desktopManager).
|
||||||
|
# services.libinput.enable = true;
|
||||||
|
|
||||||
|
# Define a user account. Don't forget to set a password with ‘passwd’.
|
||||||
|
# users.users.alice = {
|
||||||
|
# isNormalUser = true;
|
||||||
|
# extraGroups = [ "wheel" ]; # Enable ‘sudo’ for the user.
|
||||||
|
# packages = with pkgs; [
|
||||||
|
# tree
|
||||||
|
# ];
|
||||||
|
# };
|
||||||
|
users.users = {
|
||||||
|
admin = {
|
||||||
|
description = "System Administrator";
|
||||||
|
isNormalUser = true;
|
||||||
|
extraGroups = [
|
||||||
|
config.users.groups.wheel.name # Enable 'sudo' for the user.
|
||||||
|
];
|
||||||
|
initialPassword = "ChangeMe";
|
||||||
|
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFdkZTYhBdUJ1YXx/2Iek0XC/jkbdxg37GORpXUgP2NO"
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNSav7u6OxtxlAzq170/HuzE8cGvCULVGAiragtS5T6"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
ml = {
|
||||||
|
description = "Machine Learning benchmarks";
|
||||||
|
isNormalUser = true;
|
||||||
|
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFdkZTYhBdUJ1YXx/2Iek0XC/jkbdxg37GORpXUgP2NO"
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNSav7u6OxtxlAzq170/HuzE8cGvCULVGAiragtS5T6"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# programs.firefox.enable = true;
|
||||||
|
|
||||||
|
# List packages installed in system profile.
|
||||||
|
# You can use https://search.nixos.org/ to find more packages (and options).
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
vim
|
||||||
|
curl
|
||||||
|
git
|
||||||
|
wget
|
||||||
|
tmux
|
||||||
|
];
|
||||||
|
|
||||||
|
hardware = {
|
||||||
|
graphics = {
|
||||||
|
enable = true;
|
||||||
|
enable32Bit = true;
|
||||||
|
extraPackages = with pkgs; [
|
||||||
|
intel-ocl
|
||||||
|
intel-compute-runtime
|
||||||
|
intel-graphics-compiler
|
||||||
|
opencl-clhpp
|
||||||
|
opencl-headers
|
||||||
|
ocl-icd
|
||||||
|
];
|
||||||
|
};
|
||||||
|
nvidia = {
|
||||||
|
modesetting.enable = true;
|
||||||
|
powerManagement.enable = false;
|
||||||
|
powerManagement.finegrained = false;
|
||||||
|
open = false;
|
||||||
|
nvidiaSettings = false;
|
||||||
|
package = config.boot.kernelPackages.nvidiaPackages.stable;
|
||||||
|
|
||||||
|
# prime = {
|
||||||
|
# nvidiaBusId = "PCI:1:0:0";
|
||||||
|
# intelBusId = "PCI:0:2:0";
|
||||||
|
# };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Some programs need SUID wrappers, can be configured further or are
|
||||||
|
# started in user sessions.
|
||||||
|
# programs.mtr.enable = true;
|
||||||
|
# programs.gnupg.agent = {
|
||||||
|
# enable = true;
|
||||||
|
# enableSSHSupport = true;
|
||||||
|
# };
|
||||||
|
|
||||||
|
nix.settings = {
|
||||||
|
substituters = [
|
||||||
|
"https://cache.nixos-cuda.org"
|
||||||
|
];
|
||||||
|
trusted-public-keys = [
|
||||||
|
"cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M="
|
||||||
|
];
|
||||||
|
experimental-features = [
|
||||||
|
"nix-command"
|
||||||
|
"flakes"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
nixpkgs.config.allowUnfree = true;
|
||||||
|
|
||||||
|
# List services that you want to enable:
|
||||||
|
|
||||||
|
# Enable the OpenSSH daemon.
|
||||||
|
services.openssh = {
|
||||||
|
enable = true;
|
||||||
|
settings = {
|
||||||
|
PasswordAuthentication = false;
|
||||||
|
PermitRootLogin = "no";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Open ports in the firewall.
|
||||||
|
# networking.firewall.allowedTCPPorts = [ ... ];
|
||||||
|
# networking.firewall.allowedUDPPorts = [ ... ];
|
||||||
|
# Or disable the firewall altogether.
|
||||||
|
# networking.firewall.enable = false;
|
||||||
|
|
||||||
|
# Copy the NixOS configuration file and link it from the resulting system
|
||||||
|
# (/run/current-system/configuration.nix). This is useful in case you
|
||||||
|
# accidentally delete configuration.nix.
|
||||||
|
# system.copySystemConfiguration = true;
|
||||||
|
|
||||||
|
# This option defines the first version of NixOS you have installed on this particular machine,
|
||||||
|
# and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions.
|
||||||
|
#
|
||||||
|
# Most users should NEVER change this value after the initial install, for any reason,
|
||||||
|
# even if you've upgraded your system to a new NixOS release.
|
||||||
|
#
|
||||||
|
# This value does NOT affect the Nixpkgs version your packages and OS are pulled from,
|
||||||
|
# so changing it will NOT upgrade your system - see https://nixos.org/manual/nixos/stable/#sec-upgrading for how
|
||||||
|
# to actually do that.
|
||||||
|
#
|
||||||
|
# This value being lower than the current NixOS release does NOT mean your system is
|
||||||
|
# out of date, out of support, or vulnerable.
|
||||||
|
#
|
||||||
|
# Do NOT change this value unless you have manually inspected all the changes it would make to your configuration,
|
||||||
|
# and migrated your data accordingly.
|
||||||
|
#
|
||||||
|
# For more information, see `man configuration.nix` or https://nixos.org/manual/nixos/stable/options#opt-system.stateVersion .
|
||||||
|
system.stateVersion = "25.05"; # Did you read the comment?
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
151
config/nix/flake.lock
generated
Normal file
151
config/nix/flake.lock
generated
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"flake-utils_2": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": [
|
||||||
|
"nix-jetbrains-plugins",
|
||||||
|
"systems"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nix-jetbrains-plugins": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils_2",
|
||||||
|
"nixpkgs": "nixpkgs",
|
||||||
|
"systems": "systems_2"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1765025946,
|
||||||
|
"narHash": "sha256-ZSeAc3h08Lv67gbUjDMK6GTrQgYsrNpFNJEavCPxN8I=",
|
||||||
|
"owner": "theCapypara",
|
||||||
|
"repo": "nix-jetbrains-plugins",
|
||||||
|
"rev": "b861755ca1f4f7633ffdddc5608c32632cecebc3",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "theCapypara",
|
||||||
|
"repo": "nix-jetbrains-plugins",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1757745802,
|
||||||
|
"narHash": "sha256-hLEO2TPj55KcUFUU1vgtHE9UEIOjRcH/4QbmfHNF820=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "c23193b943c6c689d70ee98ce3128239ed9e32d1",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs-unstable": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1765186076,
|
||||||
|
"narHash": "sha256-hM20uyap1a0M9d344I692r+ik4gTMyj60cQWO+hAYP8=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "addf7cf5f383a3101ecfba091b98d0a1263dc9b8",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1764939437,
|
||||||
|
"narHash": "sha256-4TLFHUwXraw9Df5mXC/vCrJgb50CRr3CzUzF0Mn3CII=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "00d2457e2f608b4be6fe8b470b0a36816324b0ae",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-25.05",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nix-jetbrains-plugins": "nix-jetbrains-plugins",
|
||||||
|
"nixpkgs": "nixpkgs_2",
|
||||||
|
"nixpkgs-unstable": "nixpkgs-unstable"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
||||||
66
config/nix/flake.nix
Normal file
66
config/nix/flake.nix
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
{
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||||
|
nixpkgs-unstable.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
nix-jetbrains-plugins.url = "github:theCapypara/nix-jetbrains-plugins";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs, nixpkgs-unstable, flake-utils, nix-jetbrains-plugins }:
|
||||||
|
flake-utils.lib.eachDefaultSystem (system: let
|
||||||
|
pkgs = import nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
config.allowUnfree = true;
|
||||||
|
};
|
||||||
|
pkgs-unstable = import nixpkgs-unstable {
|
||||||
|
inherit system;
|
||||||
|
config.allowUnfree = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
python-packages = p: with p; [
|
||||||
|
numpy
|
||||||
|
];
|
||||||
|
|
||||||
|
pluginList = [
|
||||||
|
"be.ugent.piedcler.dodona"
|
||||||
|
"com.github.copilot"
|
||||||
|
"com.google.tools.ij.aiplugin"
|
||||||
|
"IdeaVIM"
|
||||||
|
];
|
||||||
|
|
||||||
|
mkShell = pkgs.mkShell.override {
|
||||||
|
stdenv = pkgs.stdenvAdapters.useMoldLinker pkgs.stdenv;
|
||||||
|
};
|
||||||
|
in {
|
||||||
|
devShells.default = pkgs.mkShell {
|
||||||
|
packages = (with pkgs; [
|
||||||
|
python311
|
||||||
|
(python-packages python311Packages)
|
||||||
|
|
||||||
|
# CUDA
|
||||||
|
git gitRepo gnupg autoconf curl
|
||||||
|
procps gnumake util-linux m4 gperf unzip
|
||||||
|
cudatoolkit linuxPackages.nvidia_x11
|
||||||
|
libGLU libGL
|
||||||
|
xorg.libXi xorg.libXmu freeglut
|
||||||
|
xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib
|
||||||
|
ncurses5 stdenv.cc binutils
|
||||||
|
]) ++ (with pkgs-unstable; [
|
||||||
|
uv
|
||||||
|
]) ++ (with nix-jetbrains-plugins.lib."${system}"; [
|
||||||
|
# Editor of your choice
|
||||||
|
#(buildIdeWithPlugins pkgs-unstable.jetbrains "pycharm-professional" pluginList)
|
||||||
|
]);
|
||||||
|
|
||||||
|
# CUDA
|
||||||
|
CUDA_PATH = pkgs.cudatoolkit;
|
||||||
|
# ImportError: libstdc++.so.6: cannot open shared object file: No such file or directory
|
||||||
|
LD_LIBRARY_PATH = "${pkgs.linuxPackages.nvidia_x11}/lib:${pkgs.ncurses5}/lib:${pkgs.libGL}/lib/:${pkgs.stdenv.cc.cc.lib}/lib/:${pkgs.glibc}/lib";
|
||||||
|
EXTRA_LDFLAGS = "-L/lib -L${pkgs.linuxPackages.nvidia_x11}/lib";
|
||||||
|
EXTRA_CCFLAGS = "-I/usr/include";
|
||||||
|
|
||||||
|
# Stop uv from downloading Python binaries automatically if needed.
|
||||||
|
UV_PYTHON_DOWNLOADS = "never";
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
9
config/sub.csv
Normal file
9
config/sub.csv
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
id,input,model,dataset,context_size
|
||||||
|
0,/home/tdpeuter/data/ml-inputs/7z2501-x64.exe,cnn,enwik9,64
|
||||||
|
1,/home/tdpeuter/data/ml-inputs/7z2501-x64.exe,cnn,human_reference,64
|
||||||
|
2,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,enwik9,64
|
||||||
|
3,/home/tdpeuter/data/ml-inputs/Firefox Setup 146.0.exe,cnn,human_reference,64
|
||||||
|
4,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,enwik9,64
|
||||||
|
5,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna,cnn,human_reference,64
|
||||||
|
6,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna.gz,cnn,enwik9,64
|
||||||
|
7,/home/tdpeuter/data/ml-inputs/GCF_000005845.2_ASM584v2_genomic.fna.gz,cnn,human_reference,64
|
||||||
|
2
config/urls.txt
Normal file
2
config/urls.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz
|
||||||
|
https://www.7-zip.org/a/7z2501-x64.exe
|
||||||
Reference in a new issue