This repository has been archived on 2025-12-23. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
2025ML-project-neural_compr.../results/cpu_compression_graphs.py
Tibo De Peuter 2f869a8a7a
chore: Restructure
# Conflicts:
#	config/measure.py
#	results/compression_results_auto_small.csv
2025-12-19 17:58:43 +01:00

114 lines
No EOL
3.4 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
def plot_compression_metrics(csv_path, output_dir="results/compress/plots"):
"""Generate visualizations for compression metrics.
Args:
csv_path (str): Path to the CSV file with compression metrics
output_dir (str): Directory to save the output plots
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Read the data
df = pd.read_csv(csv_path)
# Add a human-readable size column
def format_size(size_bytes):
for unit in ['B', 'KB', 'MB']:
if size_bytes < 1024.0:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.1f} GB"
df['file_size'] = df['original_size'].apply(format_size)
# Set up the plotting style
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
# 1. Compression Ratio Comparison
plt.figure()
ax = sns.barplot(
x='filename',
y='compression_ratio',
hue='compressor',
data=df,
palette='viridis'
)
plt.title('Compression Ratio by File and Compressor')
plt.xlabel('File')
plt.ylabel('Compression Ratio (Higher is better)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{output_dir}/compression_ratio_comparison.png", dpi=300, bbox_inches='tight')
plt.close()
# 2. Compression Time Comparison
plt.figure()
ax = sns.barplot(
x='filename',
y='compression_time',
hue='compressor',
data=df,
palette='viridis'
)
plt.title('Compression Time by File and Compressor')
plt.xlabel('File')
plt.ylabel('Compression Time (seconds) (log)')
plt.yscale('log')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{output_dir}/compression_time_comparison.png", dpi=300, bbox_inches='tight')
plt.close()
# 3. Decompression Time Comparison
plt.figure()
ax = sns.barplot(
x='filename',
y='decompression_time',
hue='compressor',
data=df,
palette='viridis'
)
plt.title('Decompression Time by File and Compressor')
plt.xlabel('File')
plt.ylabel('Decompression Time (seconds) (log)')
plt.yscale('log')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{output_dir}/decompression_time_comparison.png", dpi=300, bbox_inches='tight')
plt.close()
# 4. Scatter plot: Compression Ratio vs Compression Time
plt.figure(figsize=(10, 6))
for compressor in df['compressor'].unique():
subset = df[df['compressor'] == compressor]
plt.scatter(
subset['compression_ratio'],
subset['compression_time'],
label=compressor.upper(),
s=100,
alpha=0.7
)
plt.title('Compression Ratio vs Compression Time')
plt.xlabel('Compression Ratio (Higher is better)')
plt.ylabel('Compression Time (seconds)')
plt.legend(title='Compressor')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f"{output_dir}/ratio_vs_time.png", dpi=300, bbox_inches='tight')
plt.close()
print(f"Plots saved to {output_dir}/")
if __name__ == "__main__":
# You can modify this path to point to your CSV file
csv_path = "compression_metrics_20251216_182603.csv"
plot_compression_metrics(csv_path)