diff --git a/benchmarking/plot_finetuning_overheads.py b/benchmarking/plot_finetuning_overheads.py
new file mode 100644
index 0000000000..259b34e4b7
--- /dev/null
+++ b/benchmarking/plot_finetuning_overheads.py
@@ -0,0 +1,178 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+
+# Read the CSV file
+def plot_fwd_overhead(filepath, num_tokens_per_batch):
+    # Load the CSV file
+    df = pd.read_csv(filepath)
+    
+    # Calculate step_time as difference between consecutive timestamps
+    # Convert from microseconds to milliseconds (divide by 1000)
+    df['step_time'] = df['timestamp'].diff() / 1000
+    
+    # Filter rows based on the specified conditions
+    filtered_df = df[
+        (df['num_decoding_tokens'] == 8) &
+        (df['num_prefilling_tokens'] == 0) &
+        (df['num_finetuning_fwd_tokens'] == 0) &
+        (df['num_finetuning_bwd_tokens'] == 0)
+    ]
+    
+    # Calculate statistics for step_time
+    avg_step_time = filtered_df['step_time'].mean()
+    std_step_time = filtered_df['step_time'].std()
+    
+    # print(f"Analysis Results:")
+    # print(f"Number of matching rows: {len(filtered_df)}")
+    # print(f"Average step time: {avg_step_time:.3f} milliseconds")
+    # print(f"Standard deviation of step time: {std_step_time:.3f} milliseconds")
+    print(f"Step time: {avg_step_time:.3f} ± {std_step_time:.3f} ms ({len(filtered_df)} entries)")
+
+    if num_tokens_per_batch ==128:
+        values_of_interest=[1,14,27,41,54,67,80,94,107,120]
+    elif num_tokens_per_batch == 256:
+        values_of_interest=[1,28,56,83,111,138,166,193,221,248]
+    elif num_tokens_per_batch == 512:
+        values_of_interest=[1,57,113,169,225,280,336,392,448,504]
+
+    # Second analysis: Variable finetuning tokens
+    filtered_df_2 = df[
+        (df['is_warmup_step'] == 0) &
+        (df['num_decoding_tokens'] == 8) &
+        (df['num_prefilling_tokens'] == 0) &
+        (df['num_finetuning_bwd_tokens'] == 0) &
+        (df['num_finetuning_fwd_tokens'].isin(values_of_interest))
+    ]
+    filtered_df_2 = filtered_df_2[['num_finetuning_fwd_tokens', 'step_time']]
+    # filtered_df_2 = filtered_df_2.groupby('num_finetuning_fwd_tokens').mean().reset_index()
+    # sort by num_finetuning_fwd_tokens
+    # filtered_df_2 = filtered_df_2.sort_values('num_finetuning_fwd_tokens')
+    # print(filtered_df_2)
+    # print(filtered_df_2[['num_finetuning_fwd_tokens', 'step_time']].head())
+    
+    # Create scatter plot
+    plt.figure(figsize=(10, 6))
+    sns.scatterplot(data=filtered_df_2, 
+                   x='num_finetuning_fwd_tokens', 
+                   y='step_time',
+                   alpha=0.6)
+    
+    plt.title('Step Time vs Number of Finetuning Forward Tokens\nMax Tokens per Batch: ' + str(num_tokens_per_batch))
+    plt.xlabel('Number of Finetuning Forward Tokens')
+    plt.ylabel('Step Time (milliseconds)')
+    
+    # Add trend line
+    avg_std_df = filtered_df_2.groupby('num_finetuning_fwd_tokens').agg(
+        avg_step_time=('step_time', 'mean'),
+        std_step_time=('step_time', 'std')
+    ).reset_index()
+
+    plt.errorbar(avg_std_df['num_finetuning_fwd_tokens'], 
+                 avg_std_df['avg_step_time'], 
+                 yerr=avg_std_df['std_step_time'], 
+                 fmt='-o', 
+                 color='red', 
+                 ecolor='gray', 
+                 elinewidth=2, 
+                 capsize=4)
+    
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.tight_layout()
+
+    plt.savefig(f'./plots/fwd_overhead_{num_tokens_per_batch}.pdf', bbox_inches='tight')
+
+    # plt.show()
+    
+def plot_bwd_overhead(filepath, num_tokens_per_batch):
+    # Load the CSV file
+    df = pd.read_csv(filepath)
+    
+    # Calculate step_time as difference between consecutive timestamps
+    # Convert from microseconds to milliseconds (divide by 1000)
+    df['step_time'] = df['timestamp'].diff() / 1000
+    
+    # Filter rows based on the specified conditions
+    filtered_df = df[
+        (df['num_decoding_tokens'] == 8) &
+        (df['num_prefilling_tokens'] == 0) &
+        (df['num_finetuning_fwd_tokens'] == 0) &
+        (df['num_finetuning_bwd_tokens'] == 0)
+    ]
+    
+    # Calculate statistics for step_time
+    avg_step_time = filtered_df['step_time'].mean()
+    std_step_time = filtered_df['step_time'].std()
+    
+    # print(f"Analysis Results:")
+    # print(f"Number of matching rows: {len(filtered_df)}")
+    # print(f"Average step time: {avg_step_time:.3f} milliseconds")
+    # print(f"Standard deviation of step time: {std_step_time:.3f} milliseconds")
+    print(f"Step time: {avg_step_time:.3f} ± {std_step_time:.3f} ms ({len(filtered_df)} entries)")
+
+    values_of_interest=[1,10,19,27,36,45,54,62,71,80]
+
+    # Second analysis: Variable finetuning tokens
+    filtered_df_2 = df[
+        (df['is_warmup_step'] == 0) &
+        (df['num_decoding_tokens'] == 8) &
+        (df['num_prefilling_tokens'] == 0) &
+        (df['num_finetuning_fwd_tokens'] == 0) &
+        (df['num_finetuning_bwd_tokens'] == 1024) &
+        (df['num_bwd_layers'].isin(values_of_interest))
+    ]
+    filtered_df_2 = filtered_df_2[['num_bwd_layers', 'step_time']]
+    
+    # Create scatter plot
+    plt.figure(figsize=(10, 6))
+    sns.scatterplot(data=filtered_df_2, 
+                   x='num_bwd_layers', 
+                   y='step_time',
+                   alpha=0.6)
+    
+    plt.title('Step Time vs Number of BWD Finetuning Layers\nMax Tokens per Batch: ' + str(num_tokens_per_batch))
+    plt.xlabel('Number of BWD Finetuning Layers')
+    plt.ylabel('Step Time (milliseconds)')
+    
+    # Add trend line
+    avg_std_df = filtered_df_2.groupby('num_bwd_layers').agg(
+        avg_step_time=('step_time', 'mean'),
+        std_step_time=('step_time', 'std')
+    ).reset_index()
+
+    plt.errorbar(avg_std_df['num_bwd_layers'], 
+                 avg_std_df['avg_step_time'], 
+                 yerr=avg_std_df['std_step_time'], 
+                 fmt='-o', 
+                 color='red', 
+                 ecolor='gray', 
+                 elinewidth=2, 
+                 capsize=4)
+    
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.tight_layout()
+
+    plt.savefig(f'./plots/bwd_overhead_{num_tokens_per_batch}.pdf', bbox_inches='tight')
+
+    # plt.show()
+
+if __name__ == "__main__":
+
+    # Change working directory to folder containing this script
+    abspath = os.path.abspath(__file__)
+    dname = os.path.dirname(abspath)
+    os.chdir(dname)
+
+    # Make plots directory if it doesn't exist
+    if not os.path.exists('./plots'):
+        os.makedirs('./plots')
+
+    tp_degree=8
+
+    for tokens_per_batch in [128, 256, 512]:
+        fp=f"../inference/output/overhead_test/step_profiling_meta-llama_llama-3.1-70b_tensor_parallelism_{tp_degree}_max_requests_per_batch_8_max_tokens_per_batch_{tokens_per_batch}_arrival_rate_0.000000_num_warmup_requests_10.csv"
+        
+        plot_fwd_overhead(fp, tokens_per_batch)
+        plot_bwd_overhead(fp, tokens_per_batch)
\ No newline at end of file