This repository has been archived on 2025-12-23. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
2025ML-project-neural_compr.../config/measure_gzip_lz4.sh
Tibo De Peuter 2f869a8a7a
chore: Restructure
# Conflicts:
#	config/measure.py
#	results/compression_results_auto_small.csv
2025-12-19 17:58:43 +01:00

66 lines
No EOL
2.1 KiB
Bash
Executable file

#!/bin/bash
# Check if a directory is provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <dataset_directory>"
exit 1
fi
dataset_dir="$1"
output_file="compression_metrics_$(date +%Y%m%d_%H%M%S).csv"
# Create output CSV file with header
echo "filename,original_size,compressor,compressed_size,compression_ratio,compression_time,decompression_time" > "$output_file"
# Process each file in the dataset
for file in "$dataset_dir"/*; do
if [ -f "$file" ]; then
filename=$(basename "$file")
original_size=$(stat -c%s "$file")
# Skip if not a regular file
[ ! -f "$file" ] && continue
echo "Processing: $filename"
# Test gzip
echo " Testing gzip..."
start_time=$(date +%s.%N)
gzip -k -c "$file" > "${file}.gz"
compression_time=$(echo "$(date +%s.%N) - $start_time" | bc)
compressed_size=$(stat -c%s "${file}.gz")
compression_ratio=$(echo "scale=2; $original_size / $compressed_size" | bc -l)
# Decompress and measure time
start_time=$(date +%s.%N)
gzip -d -k -f "${file}.gz"
decompression_time=$(echo "$(date +%s.%N) - $start_time" | bc)
# Write results
echo "$filename,$original_size,gzip,$compressed_size,$compression_ratio,$compression_time,$decompression_time" >> "$output_file"
# Clean up
rm -f "${file}.gz"
# Test lz4
echo " Testing lz4..."
start_time=$(date +%s.%N)
lz4 -f -q "$file" "${file}.lz4"
compression_time=$(echo "$(date +%s.%N) - $start_time" | bc)
compressed_size=$(stat -c%s "${file}.lz4")
compression_ratio=$(echo "scale=2; $original_size / $compressed_size" | bc -l)
# Decompress and measure time
start_time=$(date +%s.%N)
lz4 -f -d -q "${file}.lz4" - | cat > /dev/null
decompression_time=$(echo "$(date +%s.%N) - $start_time" | bc)
# Write results
echo "$filename,$original_size,lz4,$compressed_size,$compression_ratio,$compression_time,$decompression_time" >> "$output_file"
# Clean up
rm -f "${file}.lz4"
fi
done
echo "All tests completed. Results saved to $output_file"