Created
July 23, 2024 22:48
-
-
Save martimatix/532129539805444cf8a65ee99f5370ff to your computer and use it in GitHub Desktop.
histogram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import json | |
from io import StringIO | |
# Read the JSON file | |
with open('your_file.json', 'r') as file: | |
data = file.read() | |
# Convert the JSON data to a pandas DataFrame | |
df = pd.read_json(StringIO(data), lines=True) | |
# Convert the 'date' column to datetime | |
df['date'] = pd.to_datetime(df['date']) | |
# Group by month and sum the 'num_docs' | |
monthly_sum = df.groupby(df['date'].dt.to_period('M'))['num_docs'].sum().reset_index() | |
# Sort the data by date | |
monthly_sum = monthly_sum.sort_values('date') | |
# Create the histogram | |
plt.figure(figsize=(12, 6)) | |
plt.bar(monthly_sum['date'].astype(str), monthly_sum['num_docs']) | |
# Customize the plot | |
plt.title('Monthly Document Count for 2023') | |
plt.xlabel('Month') | |
plt.ylabel('Number of Documents') | |
plt.xticks(rotation=45, ha='right') | |
plt.tight_layout() | |
# Show the plot | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment