#!/usr/bin/env bash # Generate a CSV that enumerates a test grid for your Python benchmarking script. # Columns: model,context_size,extra_args # # Example: # ./generate_grid_csv.sh > grid.csv # ./generate_grid_csv.sh -o grid.csv # # You can customize the axes below (MODELS, CONTEXTS, TEMPERATURES, MAX_TOKENS) # and add common extra args (COMMON_EXTRA). All fields are safely CSV-quoted. set -euo pipefail OUT_FILE="" SHOW_HELP=false usage() { cat <<'EOF' Usage: generate_grid_csv.sh [-o output.csv] Options: -o Write CSV to this file instead of stdout -h Show this help Customize the axes by editing arrays in the script: MODELS, CONTEXTS, TEMPERATURES, MAX_TOKENS, COMMON_EXTRA Examples: ./generate_grid_csv.sh > grid.csv ./generate_grid_csv.sh -o grid.csv Tip: You can also override arrays via env vars (space-separated), e.g.: MODELS="gpt-4o-mini llama-3.1-8b" CONTEXTS="4096 8192" ./generate_grid_csv.sh > grid.csv EOF } # --- Parse flags --- while getopts ":o:h" opt; do case "$opt" in o) OUT_FILE="$OPTARG" ;; h) SHOW_HELP=true ;; \?) echo "Invalid option: -$OPTARG" >&2; usage; exit 2 ;; :) echo "Option -$OPTARG requires an argument." >&2; exit 2 ;; esac done shift $((OPTIND - 1)) $SHOW_HELP && { usage; exit 0; } # --- Axes (edit or override via env) --- # You can override these by exporting env vars before running, e.g.: # export MODELS="gpt-4o-mini llama-3.1-8b" # shellcheck disable=SC2206 DATASETS=${DATASETS:-"enwik9 human_reference"} CONTEXTS=${CONTEXTS:-"64"} # Convert space-separated env vars to bash arrays # shellcheck disable=SC2206 DATASETS_ARR=($DATASETS) CONTEXTS_ARR=($CONTEXTS) # --- CSV helpers --- csv_escape() { # Escape double quotes by doubling them, and wrap the whole field in quotes. local s="$1" s=${s//\"/\"\"} printf '%s' "$s" } emit() { # Write to file or stdout if [[ -n "$OUT_FILE" ]]; then printf "%s\n" "$1" >> "$OUT_FILE" else printf "%s\n" "$1" fi } # Prepare output if [[ -n "$OUT_FILE" ]]; then : > "$OUT_FILE" # truncate/initialize fi # Header emit "id,input,model,dataset,context_size" # --- Generate rows (Cartesian product) --- id=0 model="cnn" for file in /home/tdpeuter/data/ml-inputs/*; do for dataset in "${DATASETS_ARR[@]}"; do for ctx in "${CONTEXTS_ARR[@]}"; do # CSV-quote each field row="${id},$(csv_escape "${file}"),$(csv_escape "${model}"),$(csv_escape "${dataset}"),$ctx" emit "$row" id=$((id+1)) done done done # Done if [[ -n "$OUT_FILE" ]]; then echo "CSV written to: $OUT_FILE" fi