#!/bin/bash # Check if a directory is provided if [ $# -ne 1 ]; then echo "Usage: $0 " exit 1 fi dataset_dir="$1" output_file="compression_metrics_$(date +%Y%m%d_%H%M%S).csv" # Create output CSV file with header echo "filename,original_size,compressor,compressed_size,compression_ratio,compression_time,decompression_time" > "$output_file" # Process each file in the dataset for file in "$dataset_dir"/*; do if [ -f "$file" ]; then filename=$(basename "$file") original_size=$(stat -c%s "$file") # Skip if not a regular file [ ! -f "$file" ] && continue echo "Processing: $filename" # Test gzip echo " Testing gzip..." start_time=$(date +%s.%N) gzip -k -c "$file" > "${file}.gz" compression_time=$(echo "$(date +%s.%N) - $start_time" | bc) compressed_size=$(stat -c%s "${file}.gz") compression_ratio=$(echo "scale=2; $original_size / $compressed_size" | bc -l) # Decompress and measure time start_time=$(date +%s.%N) gzip -d -k -f "${file}.gz" decompression_time=$(echo "$(date +%s.%N) - $start_time" | bc) # Write results echo "$filename,$original_size,gzip,$compressed_size,$compression_ratio,$compression_time,$decompression_time" >> "$output_file" # Clean up rm -f "${file}.gz" # Test lz4 echo " Testing lz4..." start_time=$(date +%s.%N) lz4 -f -q "$file" "${file}.lz4" compression_time=$(echo "$(date +%s.%N) - $start_time" | bc) compressed_size=$(stat -c%s "${file}.lz4") compression_ratio=$(echo "scale=2; $original_size / $compressed_size" | bc -l) # Decompress and measure time start_time=$(date +%s.%N) lz4 -f -d -q "${file}.lz4" - | cat > /dev/null decompression_time=$(echo "$(date +%s.%N) - $start_time" | bc) # Write results echo "$filename,$original_size,lz4,$compressed_size,$compression_ratio,$compression_time,$decompression_time" >> "$output_file" # Clean up rm -f "${file}.lz4" fi done echo "All tests completed. Results saved to $output_file"