114 lines
No EOL
3.4 KiB
Python
114 lines
No EOL
3.4 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import os
|
|
|
|
|
|
def plot_compression_metrics(csv_path, output_dir="results/compress/plots"):
|
|
"""Generate visualizations for compression metrics.
|
|
|
|
Args:
|
|
csv_path (str): Path to the CSV file with compression metrics
|
|
output_dir (str): Directory to save the output plots
|
|
"""
|
|
# Create output directory if it doesn't exist
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Read the data
|
|
df = pd.read_csv(csv_path)
|
|
|
|
# Add a human-readable size column
|
|
def format_size(size_bytes):
|
|
for unit in ['B', 'KB', 'MB']:
|
|
if size_bytes < 1024.0:
|
|
return f"{size_bytes:.1f} {unit}"
|
|
size_bytes /= 1024.0
|
|
return f"{size_bytes:.1f} GB"
|
|
|
|
df['file_size'] = df['original_size'].apply(format_size)
|
|
|
|
# Set up the plotting style
|
|
sns.set(style="whitegrid")
|
|
plt.rcParams['figure.figsize'] = (12, 6)
|
|
|
|
# 1. Compression Ratio Comparison
|
|
plt.figure()
|
|
ax = sns.barplot(
|
|
x='filename',
|
|
y='compression_ratio',
|
|
hue='compressor',
|
|
data=df,
|
|
palette='viridis'
|
|
)
|
|
plt.title('Compression Ratio by File and Compressor')
|
|
plt.xlabel('File')
|
|
plt.ylabel('Compression Ratio (Higher is better)')
|
|
plt.xticks(rotation=45, ha='right')
|
|
plt.tight_layout()
|
|
plt.savefig(f"{output_dir}/compression_ratio_comparison.png", dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
# 2. Compression Time Comparison
|
|
plt.figure()
|
|
ax = sns.barplot(
|
|
x='filename',
|
|
y='compression_time',
|
|
hue='compressor',
|
|
data=df,
|
|
palette='viridis'
|
|
)
|
|
plt.title('Compression Time by File and Compressor')
|
|
plt.xlabel('File')
|
|
plt.ylabel('Compression Time (seconds) (log)')
|
|
plt.yscale('log')
|
|
plt.xticks(rotation=45, ha='right')
|
|
plt.tight_layout()
|
|
plt.savefig(f"{output_dir}/compression_time_comparison.png", dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
# 3. Decompression Time Comparison
|
|
plt.figure()
|
|
ax = sns.barplot(
|
|
x='filename',
|
|
y='decompression_time',
|
|
hue='compressor',
|
|
data=df,
|
|
palette='viridis'
|
|
)
|
|
plt.title('Decompression Time by File and Compressor')
|
|
plt.xlabel('File')
|
|
plt.ylabel('Decompression Time (seconds) (log)')
|
|
plt.yscale('log')
|
|
plt.xticks(rotation=45, ha='right')
|
|
plt.tight_layout()
|
|
plt.savefig(f"{output_dir}/decompression_time_comparison.png", dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
# 4. Scatter plot: Compression Ratio vs Compression Time
|
|
plt.figure(figsize=(10, 6))
|
|
for compressor in df['compressor'].unique():
|
|
subset = df[df['compressor'] == compressor]
|
|
plt.scatter(
|
|
subset['compression_ratio'],
|
|
subset['compression_time'],
|
|
label=compressor.upper(),
|
|
s=100,
|
|
alpha=0.7
|
|
)
|
|
|
|
plt.title('Compression Ratio vs Compression Time')
|
|
plt.xlabel('Compression Ratio (Higher is better)')
|
|
plt.ylabel('Compression Time (seconds)')
|
|
plt.legend(title='Compressor')
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
plt.savefig(f"{output_dir}/ratio_vs_time.png", dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
print(f"Plots saved to {output_dir}/")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# You can modify this path to point to your CSV file
|
|
csv_path = "results/compress/compression_metrics_20251216_182603.csv"
|
|
plot_compression_metrics(csv_path) |