Skip to content

Instantly share code, notes, and snippets.

@rbatista191
Last active June 27, 2024 10:01
Show Gist options
  • Save rbatista191/e3385845b4a506cc06c119fb59c691b2 to your computer and use it in GitHub Desktop.
Save rbatista191/e3385845b4a506cc06c119fb59c691b2 to your computer and use it in GitHub Desktop.
SEO Meta Tag Optimizer for Markdown Files: This script optimizes SEO meta tags (title and description) for Markdown files in a specific folder using OpenAI's GPT-4 model. It processes Markdown files and updates their frontmatter with optimized title and description.
"""
SEO Meta Tag Optimizer for Markdown Files in a Specific Folder
This script optimizes SEO meta tags (title and description) for Markdown files
in a specific folder using OpenAI's GPT-4 model. It processes Markdown files
and updates their frontmatter with optimized title and description.
Features:
- Processes Markdown files in a specified folder
- Extracts and parses YAML frontmatter
- Generates SEO-optimized titles and descriptions using GPT-4
- Updates frontmatter with new titles and descriptions
- Implements retry logic for API calls
- Provides detailed logging
Requirements:
- Python 3.x
- openai
- pyyaml
- python-dotenv
Usage:
1. Set up a .env file with your OpenAI API key: OPENAI_API_KEY=your_api_key_here
2. Specify the target folder path in the script
3. Run the script: python seo_meta_optimizer.py
Note: This script makes API calls to OpenAI, which may incur costs.
"""
import os
import glob
import openai
import json
import yaml
from dotenv import load_dotenv
import logging
# Load the OpenAI API key from the .env file
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
# Check OpenAI version and set up client accordingly
if hasattr(openai, 'OpenAI'):
# New version (1.0.0 and later)
client = openai.OpenAI()
def create_chat_completion(**kwargs):
return client.chat.completions.create(**kwargs)
else:
# Older version
client = openai
def create_chat_completion(**kwargs):
return client.ChatCompletion.create(**kwargs)
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def process_folder(folder_path):
"""
Process all Markdown files in the specified folder.
Args:
folder_path (str): The path to the folder containing Markdown files.
"""
md_files = glob.glob(os.path.join(folder_path, '*.md'))
for md_file in md_files:
logging.info(f'Processing file: {md_file}')
try:
process_file(md_file)
except Exception as e:
logging.error(f'Error processing file {md_file}: {e}')
def extract_frontmatter(content):
"""
Extract YAML frontmatter from Markdown content.
Args:
content (str): The full content of the Markdown file.
Returns:
tuple: A tuple containing the parsed frontmatter (dict) and the remaining body (str).
"""
if content.startswith('---'):
_, frontmatter, body = content.split('---', 2)
try:
frontmatter = yaml.safe_load(frontmatter)
except yaml.YAMLError as e:
logging.error(f"Error loading frontmatter YAML: {e}")
return {}, content
return frontmatter, body
else:
return {}, content
def process_file(filepath):
"""
Process a single Markdown file.
Args:
filepath (str): The path to the Markdown file.
"""
with open(filepath, 'r', encoding='utf-8') as file:
content = file.read()
frontmatter, body = extract_frontmatter(content)
current_title = frontmatter.get('title', '')
current_description = frontmatter.get('description', '')
filename = os.path.basename(filepath) # Get the filename from the path
new_title = current_title
new_description = current_description
if not validate_length(current_title, 'title'):
new_title = generate_optimal_text(body, 'title', filename)
if not validate_length(current_description, 'description'):
new_description = generate_optimal_text(body, 'description', filename)
if new_title != current_title or new_description != current_description:
update_frontmatter(filepath, frontmatter, new_title, new_description)
else:
logging.info(f'No changes needed for file: {filepath}')
def generate_optimal_text(body, text_type, filename):
"""
Generate optimized title or description using GPT-4.
Args:
body (str): The main content of the Markdown file.
text_type (str): Either 'title' or 'description'.
filename (str): The name of the file being processed.
Returns:
str: The generated title or description.
Raises:
RuntimeError: If maximum retries are exceeded.
"""
length_range = "35 to 45" if text_type == 'title' else "110 to 160"
filename_without_extension = os.path.splitext(filename)[0] # Remove file extension
prompt = (
f"Based on the following markdown content and filename, generate an optimal {text_type} "
f"(ranging strictly from {length_range} characters) in JSON format. "
"Stick to the requirements given in terms of character length at all costs! "
f"The JSON key should be '{text_type}'. "
f"For the title, try to incorporate relevant words from the filename: '{filename_without_extension}'. "
"Ensure the title is catchy and SEO-friendly.\n\n"
"Content:\n" + body[:500] # Limiting content to first 500 characters to avoid token limits
)
retry_count = 0
max_retries = 10
while retry_count < max_retries:
try:
response = create_chat_completion(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a world-class SEO assistant that helps optimize meta tags. You are very strict with the requirements given in terms of character length."},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
# Handle response based on OpenAI version
if hasattr(openai, 'OpenAI'):
result = response.choices[0].message.content.strip()
else:
result = response.choices[0].message['content'].strip()
result_json = json.loads(result)
generated_text = result_json[text_type]
if validate_length(generated_text, text_type):
return generated_text
except Exception as e:
logging.error(f"Error in generating optimal {text_type}: {e}")
logging.info(f"Generated {text_type} didn't fit the criteria. Retrying...")
retry_count += 1
raise RuntimeError(f"Exceeded maximum retries for generating optimal {text_type}")
def validate_length(text, text_type):
"""
Validate the length of the title or description.
Args:
text (str): The text to validate.
text_type (str): Either 'title' or 'description'.
Returns:
bool: True if the text meets the length criteria, False otherwise.
"""
if text_type == 'title':
valid = 35 <= len(text) <= 45
else: # description
valid = 110 <= len(text) <= 160
if not valid:
logging.warning(f"{text_type.capitalize()} length out of range: {len(text)}")
return valid
def update_frontmatter(filepath, frontmatter, title, description):
"""
Update the frontmatter of a Markdown file with new title and description.
Args:
filepath (str): The path to the Markdown file.
frontmatter (dict): The existing frontmatter.
title (str): The new title.
description (str): The new description.
"""
old_title = frontmatter.get('title', '')
old_description = frontmatter.get('description', '')
# Replace colons with hyphens in title and description
title = title.replace(':', ' -')
description = description.replace(':', ' -')
frontmatter['title'] = title
frontmatter['description'] = description
try:
with open(filepath, 'r', encoding='utf-8') as file:
content = file.read()
except IOError as e:
logging.error(f'Error reading file {filepath}: {e}')
return
if content.startswith('---'):
parts = content.split('---', 2)
if len(parts) < 3:
logging.error('File split did not retrieve expected three parts')
return
_, _, body = parts
new_content = f"---\n{yaml.dump(frontmatter, allow_unicode=True)}---\n{body}"
else:
new_content = f"---\n{yaml.dump(frontmatter, allow_unicode=True)}---\n{content}"
with open(filepath, 'w', encoding='utf-8') as file:
file.write(new_content)
logging.info(f'Updated file: {filepath}')
logging.info(f'Old title: {old_title} -> New title: {title}')
logging.info(f'Old description: {old_description} -> New description: {description}')
# Main execution
if __name__ == "__main__":
# Specify the target folder path (can be relative or absolute)
target_folder = '../content/docs/blog'
# Or use an absolute path:
# target_folder =
# Convert to absolute path if it's relative
target_folder = os.path.abspath(target_folder)
if not os.path.isdir(target_folder):
logging.error(f"The specified folder does not exist: {target_folder}")
else:
logging.info(f'Starting to process files in folder: {target_folder}')
process_folder(target_folder)
logging.info('Processing completed.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment