chore: Restructure

# Conflicts:
#	config/measure.py
#	results/compression_results_auto_small.csv
This commit is contained in:
Tibo De Peuter 2025-12-18 11:18:39 +01:00
parent dd938489ef
commit 2f869a8a7a
Signed by: tdpeuter
GPG key ID: 38297DE43F75FFE2
41 changed files with 8 additions and 21 deletions

View file

@ -1,114 +0,0 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
def plot_compression_metrics(csv_path, output_dir="results/compress/plots"):
"""Generate visualizations for compression metrics.
Args:
csv_path (str): Path to the CSV file with compression metrics
output_dir (str): Directory to save the output plots
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Read the data
df = pd.read_csv(csv_path)
# Add a human-readable size column
def format_size(size_bytes):
for unit in ['B', 'KB', 'MB']:
if size_bytes < 1024.0:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.1f} GB"
df['file_size'] = df['original_size'].apply(format_size)
# Set up the plotting style
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
# 1. Compression Ratio Comparison
plt.figure()
ax = sns.barplot(
x='filename',
y='compression_ratio',
hue='compressor',
data=df,
palette='viridis'
)
plt.title('Compression Ratio by File and Compressor')
plt.xlabel('File')
plt.ylabel('Compression Ratio (Higher is better)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{output_dir}/compression_ratio_comparison.png", dpi=300, bbox_inches='tight')
plt.close()
# 2. Compression Time Comparison
plt.figure()
ax = sns.barplot(
x='filename',
y='compression_time',
hue='compressor',
data=df,
palette='viridis'
)
plt.title('Compression Time by File and Compressor')
plt.xlabel('File')
plt.ylabel('Compression Time (seconds) (log)')
plt.yscale('log')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{output_dir}/compression_time_comparison.png", dpi=300, bbox_inches='tight')
plt.close()
# 3. Decompression Time Comparison
plt.figure()
ax = sns.barplot(
x='filename',
y='decompression_time',
hue='compressor',
data=df,
palette='viridis'
)
plt.title('Decompression Time by File and Compressor')
plt.xlabel('File')
plt.ylabel('Decompression Time (seconds) (log)')
plt.yscale('log')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{output_dir}/decompression_time_comparison.png", dpi=300, bbox_inches='tight')
plt.close()
# 4. Scatter plot: Compression Ratio vs Compression Time
plt.figure(figsize=(10, 6))
for compressor in df['compressor'].unique():
subset = df[df['compressor'] == compressor]
plt.scatter(
subset['compression_ratio'],
subset['compression_time'],
label=compressor.upper(),
s=100,
alpha=0.7
)
plt.title('Compression Ratio vs Compression Time')
plt.xlabel('Compression Ratio (Higher is better)')
plt.ylabel('Compression Time (seconds)')
plt.legend(title='Compressor')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f"{output_dir}/ratio_vs_time.png", dpi=300, bbox_inches='tight')
plt.close()
print(f"Plots saved to {output_dir}/")
if __name__ == "__main__":
# You can modify this path to point to your CSV file
csv_path = "results/compress/compression_metrics_20251216_182603.csv"
plot_compression_metrics(csv_path)