Skip to content

Instantly share code, notes, and snippets.

@finmoorhouse
Last active November 5, 2023 17:37
Show Gist options
  • Save finmoorhouse/4fd8ddb50a6b9a7d9690f049992f89c9 to your computer and use it in GitHub Desktop.
Save finmoorhouse/4fd8ddb50a6b9a7d9690f049992f89c9 to your computer and use it in GitHub Desktop.
Export Goodreads reviews to markdown
import pandas as pd
import re
# Define the path to your CSV file
csv_file_path = 'goodreads_library_export.csv'
# Define the path to the output file
output_file_path = '/path/to/output.md'
# Read the data from the CSV file
df = pd.read_csv(csv_file_path)
min_review_length = 100 # Define the minimum number of characters for a review
# Filter the DataFrame to only include rows where 'My Review' column is not empty
df = df[df['My Review'].notna() & (df['My Review'].str.len() > min_review_length)]
# Convert "Date Read" and "Date Added" to datetime within the original DataFrame
df.loc[:, 'Date Read'] = pd.to_datetime(df['Date Read'], errors='coerce')
df.loc[:, 'Date Added'] = pd.to_datetime(df['Date Added'], errors='coerce')
# Use .loc to avoid SettingWithCopyWarning
df.loc[:, 'Sort Date'] = df.apply(lambda x: x['Date Read'] if pd.notnull(x['Date Read']) else x['Date Added'], axis=1)
# Sort the DataFrame by the new 'Sort Date' column
df = df.sort_values(by='Sort Date', ascending=False)
# Function to replace Goodreads 'book link' codes with markdown links
def replace_goodreads_links(text):
# Define the regex pattern for Goodreads links
pattern = r'\[(b|a):(.+?)\|(\d+)(?:\|.*?)?\]'
# Define a replacement function
def link_replacement(match):
# Extract the book name and book code from the match
link_type = match.group(1).strip()
name = match.group(2).strip()
code = match.group(3).strip()
# Return the markdown link format
# Return the appropriate markdown link format based on the type
if link_type == 'b': # Book link
return f"[{name}](https://www.goodreads.com/book/show/{code})"
elif link_type == 'a': # Author link
return f"[{name}](https://www.goodreads.com/author/show/{code})"
# Replace all occurrences in the text
return re.sub(pattern, link_replacement, text)
frontmatter = """---
(Your markdown frontmatter here)
---
"""
preamble = """
Your preamble here.
"""
# Initialize a variable to keep track of the last encountered year
last_year = None
# Open the output markdown file for writing
with open(output_file_path, 'w') as f:
f.write(frontmatter)
f.write(preamble)
# Iterate over each row in the DataFrame
for index, row in df.iterrows():
# Extract the year from 'Sort Date'
current_year = row['Sort Date'].year
# If the year of the current book is different from the last one processed, write a new year header
if current_year != last_year:
if last_year is not None: # Add a separation between different years if not the first year
f.write("\n\n")
f.write(f"## {current_year}\n\n")
# Update the last year tracker
last_year = current_year
# Write the book title as an H2 header
f.write(f"### {row['Title']}\n\n")
# Show author
f.write(f"{row['Author']} ({int(row['Year Published'])}) • ")
# Show my score
if(row['My Rating']):
f.write(f"{'★' * row['My Rating'] + '☆' * (5-row['My Rating'])} • ")
# Add date
f.write(f"{row['Sort Date'].strftime('%b %y')} • ")
# Generate link
f.write(f"[Link to book ↗](https://www.goodreads.com/book/show/{row['Book Id']})")
f.write("\n\n")
# Write the review text
formatted_content = row['My Review'].replace("<br/>", "\n")
formatted_content = replace_goodreads_links(formatted_content)
f.write(f"{formatted_content}\n\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment