Skip to content

Instantly share code, notes, and snippets.

@billmetangmo
Created October 6, 2023 11:17
Show Gist options
  • Save billmetangmo/7baa9ad60652a8f34114b3afcd0c6ea1 to your computer and use it in GitHub Desktop.
Save billmetangmo/7baa9ad60652a8f34114b3afcd0c6ea1 to your computer and use it in GitHub Desktop.
Generate histogram from gitlab pipelines scrape with Bardeen
import pandas as pd
import datetime
import re
import matplotlib.pyplot as plt
# Load the CSV file
df = pd.read_csv("/mnt/data/06-10-2023_10-10.csv")
# Function to convert status text to timedelta
def convert_to_timedelta_updated_with_weeks(text):
if "minutes" in text:
minutes = int(re.search(r'(\d+) minutes', text).group(1))
return datetime.timedelta(minutes=minutes)
elif "hours" in text:
hours = int(re.search(r'(\d+) hours', text).group(1))
return datetime.timedelta(hours=hours)
elif "days" in text:
days = int(re.search(r'(\d+) days', text).group(1))
return datetime.timedelta(days=days)
elif "weeks" in text:
weeks = int(re.search(r'(\d+) weeks', text).group(1))
return datetime.timedelta(weeks=weeks)
else:
return datetime.timedelta()
# Filter jobs with "Passed" status
passed_jobs = df[df['Status'].str.contains('Passed')].copy()
# Calculate the ExecutionDate and Stage columns
passed_jobs['TimeAgo'] = passed_jobs['Status'].apply(convert_to_timedelta_updated_with_weeks)
current_time = datetime.datetime.now()
passed_jobs['ExecutionDate'] = current_time - passed_jobs['TimeAgo']
passed_jobs['Stage'] = passed_jobs['Pipeline'].str.extract(r'Stage: (\w+)')
passed_jobs.drop('TimeAgo', axis=1, inplace=True)
# Extract and convert execution time
passed_jobs['ExecutionTime'] = passed_jobs['Status'].str.extract(r'(\d{2}:\d{2}:\d{2})')
passed_jobs['ExecutionTime'] = pd.to_timedelta(passed_jobs['ExecutionTime'])
passed_jobs['ExecutionTimeSeconds'] = passed_jobs['ExecutionTime'].dt.total_seconds()
# Group data and calculate mean execution time
mean_execution_times_seconds = passed_jobs.groupby(['ExecutionDateOnly', 'Stage'])['ExecutionTimeSeconds'].mean()
mean_execution_times_seconds_unstacked = mean_execution_times_seconds.unstack()
mean_execution_times_seconds_filled = mean_execution_times_seconds_unstacked.fillna(0)
# Define colors
colors = [
'#E63946', '#F1FAEE', '#A8DADC', '#457B9D', '#1D3557',
'#F4A261', '#2A9D8F', '#264653', '#E76F51', '#2B2D42'
]
# Plot data
fig, ax = plt.subplots(figsize=(15, 7))
mean_execution_times_seconds_filled.plot(kind='bar', ax=ax, color=colors)
ax.set_ylabel('Average Execution Time (seconds)')
ax.set_title('Average Execution Time by Day for Each Stage')
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend(title="Stage")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment