Created
February 16, 2024 10:47
-
-
Save Digma/a2c67428b69113714325075e039dc475 to your computer and use it in GitHub Desktop.
pc-gamer-gaming-industry-layoff-plot-pandas-matplotlib.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import pandas as pd | |
# Data from: https://publish.obsidian.md/vg-layoffs/Archive/2024 | |
# The data is stored in a directory with one CSV per year. Change the directory name accordingly | |
path = './data/gaming-layoff/' | |
all_files = glob.glob(os.path.join(path, "*.csv")) # advisable to use os.path.join as this makes concatenation OS independent | |
# Load all the CSVs into a single dataframe | |
df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True) | |
# Lowercase the column names | |
df.columns = (col.lower() for col in df.columns) | |
# Convert the date column to datetime | |
df['date'] = pd.to_datetime(df['date']) | |
# Keep only the data after 2023-01-01 | |
df = df.query("date >= '2023-01-01'") | |
# Explode the dataset to get 1 row per layoff | |
df_per_head = df[["date"]].loc[df.index.repeat(df.headcount)] | |
# Spread the dates out randomly over an interval of 7 days | |
df_per_head["date_randomized"] = df_per_head["date"] + pd.to_timedelta(np.random.rand(len(df_per_head)) * 24 * 7, unit="H") | |
# Add a random y value between 0 and 1 | |
df_per_head["y"] = np.random.rand(len(df_per_head)) | |
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.rcParams["figure.dpi"] = 300 | |
# Define useful colors | |
GREY10 = "#1a1a1a" | |
GREY30 = "#4d4d4d" | |
GREY40 = "#666666" | |
BACKGROUND_COLOR = "#eedccb" | |
BACKGROUND_AXES_COLOR = "#dbcebe" | |
# Create the plot | |
fig, ax = plt.subplots(figsize=(8, 3.5)) | |
ax.scatter(df_per_head['date_randomized'], df_per_head['y'], s=0.4, linewidths=0.02, c='black') | |
# Set background colors | |
fig.patch.set_facecolor(BACKGROUND_COLOR) | |
ax.set_facecolor(BACKGROUND_AXES_COLOR) | |
# Axes | |
from datetime import datetime | |
ax.set_xlim(datetime(2023, 1,1),datetime(2024, 1,31)) | |
ax.set_ylim(0, 1) | |
# Remove uneeded spines | |
ax.spines["left"].set_color("none") | |
ax.spines["right"].set_color("none") | |
ax.spines["top"].set_color("none") | |
ax.spines['bottom'].set_color(GREY10) | |
# Remove y axis ticks and labels | |
ax.yaxis.set_ticks([]) | |
ax.yaxis.set_ticklabels([]) | |
# We will split the x axis into 2 parts: months and years | |
# 1. Month x-axis | |
import matplotlib.ticker as ticker | |
import matplotlib.dates as dates | |
# Define Major axis that we will use for the ticks (and hide the labels) | |
ax.xaxis.set_major_locator(dates.MonthLocator()) | |
ax.xaxis.set_major_formatter(ticker.NullFormatter()) | |
# Define a minor axis centered on the 16th of each month where we will place the x-axis month labels | |
ax.xaxis.set_minor_locator(dates.MonthLocator(bymonthday=16)) | |
ax.xaxis.set_minor_formatter(dates.DateFormatter('%b')) | |
# Month x-axis: Tick style | |
ax.tick_params(axis='x', which='minor', length=0) # Remove | |
ax.tick_params(axis='x', which='major', pad=2, color=GREY30) | |
# Month x-axis: label style | |
for label in ax.get_xticklabels(minor=True): | |
label.set_color(GREY30) | |
label.set_size(5) | |
label.set_horizontalalignment('center') | |
# Month x-axis: Add arrow at the end of the x-axis | |
ax.plot(1.002, 0, "k>", transform=ax.get_yaxis_transform(), clip_on=False, markersize=3) | |
# 2. Year x-axis | |
ax2 = ax.twiny() | |
ax2.set_xlim(ax.get_xlim()) | |
# Hide all the axes | |
ax2.spines["left"].set_color("none") | |
ax2.spines["right"].set_color("none") | |
ax2.spines["top"].set_color("none") | |
ax2.spines["bottom"].set_color("none") | |
# Position the new axes, labels and ticks below the existing (Month) one | |
ax2.spines["bottom"].set_position(("axes", -0.05)) | |
ax2.xaxis.set_ticks_position('bottom') | |
ax2.xaxis.set_label_position('bottom') | |
# Define Major axis that we will use for the ticks (and hide the labels) | |
ax2.xaxis.set_major_locator(dates.YearLocator()) | |
ax2.xaxis.set_major_formatter(ticker.NullFormatter()) | |
# Define a minor axis centered on the 7th Month we will place the x-axis year labels | |
ax2.xaxis.set_minor_locator(dates.YearLocator(month=7)) | |
ax2.xaxis.set_minor_formatter(dates.DateFormatter('%Y')) | |
# Year x-axis: Tick style | |
ax2.tick_params(axis='x', which='minor', length=0) # Remove | |
# direction='in' is a trick to connect the tick line to the main axis | |
ax2.tick_params(axis='x', which='major', length=10, width=1.15, direction='in', color=GREY30) | |
# Year x-axis: label style | |
for label in ax2.get_xticklabels(minor=True): | |
label.set_color(GREY30) | |
label.set_size(7) | |
label.set_fontweight('bold') | |
label.set_horizontalalignment('center') | |
# Add title and subtitles | |
fig.text( | |
0.125, # x-coordinate | |
0.95, # y-coordinate | |
"GAMING INDUSTRY LAYOFFS JAN 2023 - 2024", | |
color=GREY30, | |
fontname="Arial", | |
fontsize=11, | |
weight="bold" | |
) | |
# Subtitle | |
fig.text( | |
0.125, # x-coordinate | |
0.91, # y-coordinate | |
"Each dot represents one of the approximately 16,000 people affected since January 2023", | |
color=GREY30, | |
fontname="Arial", | |
fontsize=6, | |
) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment