import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import os def plot_compression_metrics(csv_path, output_dir="results/compress/plots"): """Generate visualizations for compression metrics. Args: csv_path (str): Path to the CSV file with compression metrics output_dir (str): Directory to save the output plots """ # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) # Read the data df = pd.read_csv(csv_path) # Add a human-readable size column def format_size(size_bytes): for unit in ['B', 'KB', 'MB']: if size_bytes < 1024.0: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024.0 return f"{size_bytes:.1f} GB" df['file_size'] = df['original_size'].apply(format_size) # Set up the plotting style sns.set(style="whitegrid") plt.rcParams['figure.figsize'] = (12, 6) # 1. Compression Ratio Comparison plt.figure() ax = sns.barplot( x='filename', y='compression_ratio', hue='compressor', data=df, palette='viridis' ) plt.title('Compression Ratio by File and Compressor') plt.xlabel('File') plt.ylabel('Compression Ratio (Higher is better)') plt.xticks(rotation=45, ha='right') plt.tight_layout() plt.savefig(f"{output_dir}/compression_ratio_comparison.png", dpi=300, bbox_inches='tight') plt.close() # 2. Compression Time Comparison plt.figure() ax = sns.barplot( x='filename', y='compression_time', hue='compressor', data=df, palette='viridis' ) plt.title('Compression Time by File and Compressor') plt.xlabel('File') plt.ylabel('Compression Time (seconds) (log)') plt.yscale('log') plt.xticks(rotation=45, ha='right') plt.tight_layout() plt.savefig(f"{output_dir}/compression_time_comparison.png", dpi=300, bbox_inches='tight') plt.close() # 3. Decompression Time Comparison plt.figure() ax = sns.barplot( x='filename', y='decompression_time', hue='compressor', data=df, palette='viridis' ) plt.title('Decompression Time by File and Compressor') plt.xlabel('File') plt.ylabel('Decompression Time (seconds) (log)') plt.yscale('log') plt.xticks(rotation=45, ha='right') plt.tight_layout() plt.savefig(f"{output_dir}/decompression_time_comparison.png", dpi=300, bbox_inches='tight') plt.close() # 4. Scatter plot: Compression Ratio vs Compression Time plt.figure(figsize=(10, 6)) for compressor in df['compressor'].unique(): subset = df[df['compressor'] == compressor] plt.scatter( subset['compression_ratio'], subset['compression_time'], label=compressor.upper(), s=100, alpha=0.7 ) plt.title('Compression Ratio vs Compression Time') plt.xlabel('Compression Ratio (Higher is better)') plt.ylabel('Compression Time (seconds)') plt.legend(title='Compressor') plt.grid(True, alpha=0.3) plt.tight_layout() plt.savefig(f"{output_dir}/ratio_vs_time.png", dpi=300, bbox_inches='tight') plt.close() print(f"Plots saved to {output_dir}/") if __name__ == "__main__": # You can modify this path to point to your CSV file csv_path = "results/compress/compression_metrics_20251216_182603.csv" plot_compression_metrics(csv_path)