Instantly share code, notes, and snippets.
Last active
June 27, 2024 10:01
-
Star
(1)
1
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save rbatista191/e3385845b4a506cc06c119fb59c691b2 to your computer and use it in GitHub Desktop.
SEO Meta Tag Optimizer for Markdown Files: This script optimizes SEO meta tags (title and description) for Markdown files in a specific folder using OpenAI's GPT-4 model. It processes Markdown files and updates their frontmatter with optimized title and description.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
SEO Meta Tag Optimizer for Markdown Files in a Specific Folder | |
This script optimizes SEO meta tags (title and description) for Markdown files | |
in a specific folder using OpenAI's GPT-4 model. It processes Markdown files | |
and updates their frontmatter with optimized title and description. | |
Features: | |
- Processes Markdown files in a specified folder | |
- Extracts and parses YAML frontmatter | |
- Generates SEO-optimized titles and descriptions using GPT-4 | |
- Updates frontmatter with new titles and descriptions | |
- Implements retry logic for API calls | |
- Provides detailed logging | |
Requirements: | |
- Python 3.x | |
- openai | |
- pyyaml | |
- python-dotenv | |
Usage: | |
1. Set up a .env file with your OpenAI API key: OPENAI_API_KEY=your_api_key_here | |
2. Specify the target folder path in the script | |
3. Run the script: python seo_meta_optimizer.py | |
Note: This script makes API calls to OpenAI, which may incur costs. | |
""" | |
import os | |
import glob | |
import openai | |
import json | |
import yaml | |
from dotenv import load_dotenv | |
import logging | |
# Load the OpenAI API key from the .env file | |
load_dotenv() | |
openai.api_key = os.getenv('OPENAI_API_KEY') | |
# Check OpenAI version and set up client accordingly | |
if hasattr(openai, 'OpenAI'): | |
# New version (1.0.0 and later) | |
client = openai.OpenAI() | |
def create_chat_completion(**kwargs): | |
return client.chat.completions.create(**kwargs) | |
else: | |
# Older version | |
client = openai | |
def create_chat_completion(**kwargs): | |
return client.ChatCompletion.create(**kwargs) | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
def process_folder(folder_path): | |
""" | |
Process all Markdown files in the specified folder. | |
Args: | |
folder_path (str): The path to the folder containing Markdown files. | |
""" | |
md_files = glob.glob(os.path.join(folder_path, '*.md')) | |
for md_file in md_files: | |
logging.info(f'Processing file: {md_file}') | |
try: | |
process_file(md_file) | |
except Exception as e: | |
logging.error(f'Error processing file {md_file}: {e}') | |
def extract_frontmatter(content): | |
""" | |
Extract YAML frontmatter from Markdown content. | |
Args: | |
content (str): The full content of the Markdown file. | |
Returns: | |
tuple: A tuple containing the parsed frontmatter (dict) and the remaining body (str). | |
""" | |
if content.startswith('---'): | |
_, frontmatter, body = content.split('---', 2) | |
try: | |
frontmatter = yaml.safe_load(frontmatter) | |
except yaml.YAMLError as e: | |
logging.error(f"Error loading frontmatter YAML: {e}") | |
return {}, content | |
return frontmatter, body | |
else: | |
return {}, content | |
def process_file(filepath): | |
""" | |
Process a single Markdown file. | |
Args: | |
filepath (str): The path to the Markdown file. | |
""" | |
with open(filepath, 'r', encoding='utf-8') as file: | |
content = file.read() | |
frontmatter, body = extract_frontmatter(content) | |
current_title = frontmatter.get('title', '') | |
current_description = frontmatter.get('description', '') | |
filename = os.path.basename(filepath) # Get the filename from the path | |
new_title = current_title | |
new_description = current_description | |
if not validate_length(current_title, 'title'): | |
new_title = generate_optimal_text(body, 'title', filename) | |
if not validate_length(current_description, 'description'): | |
new_description = generate_optimal_text(body, 'description', filename) | |
if new_title != current_title or new_description != current_description: | |
update_frontmatter(filepath, frontmatter, new_title, new_description) | |
else: | |
logging.info(f'No changes needed for file: {filepath}') | |
def generate_optimal_text(body, text_type, filename): | |
""" | |
Generate optimized title or description using GPT-4. | |
Args: | |
body (str): The main content of the Markdown file. | |
text_type (str): Either 'title' or 'description'. | |
filename (str): The name of the file being processed. | |
Returns: | |
str: The generated title or description. | |
Raises: | |
RuntimeError: If maximum retries are exceeded. | |
""" | |
length_range = "35 to 45" if text_type == 'title' else "110 to 160" | |
filename_without_extension = os.path.splitext(filename)[0] # Remove file extension | |
prompt = ( | |
f"Based on the following markdown content and filename, generate an optimal {text_type} " | |
f"(ranging strictly from {length_range} characters) in JSON format. " | |
"Stick to the requirements given in terms of character length at all costs! " | |
f"The JSON key should be '{text_type}'. " | |
f"For the title, try to incorporate relevant words from the filename: '{filename_without_extension}'. " | |
"Ensure the title is catchy and SEO-friendly.\n\n" | |
"Content:\n" + body[:500] # Limiting content to first 500 characters to avoid token limits | |
) | |
retry_count = 0 | |
max_retries = 10 | |
while retry_count < max_retries: | |
try: | |
response = create_chat_completion( | |
model="gpt-4o", | |
messages=[ | |
{"role": "system", "content": "You are a world-class SEO assistant that helps optimize meta tags. You are very strict with the requirements given in terms of character length."}, | |
{"role": "user", "content": prompt} | |
], | |
response_format={"type": "json_object"} | |
) | |
# Handle response based on OpenAI version | |
if hasattr(openai, 'OpenAI'): | |
result = response.choices[0].message.content.strip() | |
else: | |
result = response.choices[0].message['content'].strip() | |
result_json = json.loads(result) | |
generated_text = result_json[text_type] | |
if validate_length(generated_text, text_type): | |
return generated_text | |
except Exception as e: | |
logging.error(f"Error in generating optimal {text_type}: {e}") | |
logging.info(f"Generated {text_type} didn't fit the criteria. Retrying...") | |
retry_count += 1 | |
raise RuntimeError(f"Exceeded maximum retries for generating optimal {text_type}") | |
def validate_length(text, text_type): | |
""" | |
Validate the length of the title or description. | |
Args: | |
text (str): The text to validate. | |
text_type (str): Either 'title' or 'description'. | |
Returns: | |
bool: True if the text meets the length criteria, False otherwise. | |
""" | |
if text_type == 'title': | |
valid = 35 <= len(text) <= 45 | |
else: # description | |
valid = 110 <= len(text) <= 160 | |
if not valid: | |
logging.warning(f"{text_type.capitalize()} length out of range: {len(text)}") | |
return valid | |
def update_frontmatter(filepath, frontmatter, title, description): | |
""" | |
Update the frontmatter of a Markdown file with new title and description. | |
Args: | |
filepath (str): The path to the Markdown file. | |
frontmatter (dict): The existing frontmatter. | |
title (str): The new title. | |
description (str): The new description. | |
""" | |
old_title = frontmatter.get('title', '') | |
old_description = frontmatter.get('description', '') | |
# Replace colons with hyphens in title and description | |
title = title.replace(':', ' -') | |
description = description.replace(':', ' -') | |
frontmatter['title'] = title | |
frontmatter['description'] = description | |
try: | |
with open(filepath, 'r', encoding='utf-8') as file: | |
content = file.read() | |
except IOError as e: | |
logging.error(f'Error reading file {filepath}: {e}') | |
return | |
if content.startswith('---'): | |
parts = content.split('---', 2) | |
if len(parts) < 3: | |
logging.error('File split did not retrieve expected three parts') | |
return | |
_, _, body = parts | |
new_content = f"---\n{yaml.dump(frontmatter, allow_unicode=True)}---\n{body}" | |
else: | |
new_content = f"---\n{yaml.dump(frontmatter, allow_unicode=True)}---\n{content}" | |
with open(filepath, 'w', encoding='utf-8') as file: | |
file.write(new_content) | |
logging.info(f'Updated file: {filepath}') | |
logging.info(f'Old title: {old_title} -> New title: {title}') | |
logging.info(f'Old description: {old_description} -> New description: {description}') | |
# Main execution | |
if __name__ == "__main__": | |
# Specify the target folder path (can be relative or absolute) | |
target_folder = '../content/docs/blog' | |
# Or use an absolute path: | |
# target_folder = | |
# Convert to absolute path if it's relative | |
target_folder = os.path.abspath(target_folder) | |
if not os.path.isdir(target_folder): | |
logging.error(f"The specified folder does not exist: {target_folder}") | |
else: | |
logging.info(f'Starting to process files in folder: {target_folder}') | |
process_folder(target_folder) | |
logging.info('Processing completed.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment