Skip to content

Instantly share code, notes, and snippets.

@martimatix
Created July 23, 2024 22:48
Show Gist options
  • Save martimatix/532129539805444cf8a65ee99f5370ff to your computer and use it in GitHub Desktop.
Save martimatix/532129539805444cf8a65ee99f5370ff to your computer and use it in GitHub Desktop.
histogram
import pandas as pd
import matplotlib.pyplot as plt
import json
from io import StringIO
# Read the JSON file
with open('your_file.json', 'r') as file:
data = file.read()
# Convert the JSON data to a pandas DataFrame
df = pd.read_json(StringIO(data), lines=True)
# Convert the 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])
# Group by month and sum the 'num_docs'
monthly_sum = df.groupby(df['date'].dt.to_period('M'))['num_docs'].sum().reset_index()
# Sort the data by date
monthly_sum = monthly_sum.sort_values('date')
# Create the histogram
plt.figure(figsize=(12, 6))
plt.bar(monthly_sum['date'].astype(str), monthly_sum['num_docs'])
# Customize the plot
plt.title('Monthly Document Count for 2023')
plt.xlabel('Month')
plt.ylabel('Number of Documents')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
# Show the plot
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment