Skip to content

Instantly share code, notes, and snippets.

@JayCuthrell
Created October 21, 2023 23:45
Show Gist options
  • Save JayCuthrell/2118975fdd255bd23b73bd39a02ab2ac to your computer and use it in GitHub Desktop.
Save JayCuthrell/2118975fdd255bd23b73bd39a02ab2ac to your computer and use it in GitHub Desktop.
Create frontmatter tags from a blog post using rake-nltk and stopwords
import re
import sys
import rake_nltk
from rake_nltk import Metric, Rake
def get_text_from_markdown(file_path):
with open(file_path, 'r') as f:
text = f.read()
# Remove markdown tags
text = re.sub(r'\[(.*?)\]\(.*?\)', '', text)
text = re.sub(r'`(.*?)`', '', text)
return text
def generate_keywords(text):
rake = rake_nltk.Rake(ranking_metric=Metric.WORD_FREQUENCY, min_length=1, max_length=2, stopwords="punkt", include_repeated_phrases=False)
rake.extract_keywords_from_text(text)
keywords = rake.get_ranked_phrases()
return keywords
def generate_frontmatter_tags(keywords):
frontmatter_tags = []
for keyword in keywords:
frontmatter_tags.append(f' - {keyword}')
return frontmatter_tags
def gather_keywords_and_return_frontmatter_formatted_tags(blog_post_file_path):
text = get_text_from_markdown(blog_post_file_path)
keywords = generate_keywords(text)
frontmatter_tags = generate_frontmatter_tags(keywords)
return frontmatter_tags
if __name__ == '__main__':
blog_post_file_path = sys.argv[1]
frontmatter_tags = gather_keywords_and_return_frontmatter_formatted_tags(blog_post_file_path)
# Print the frontmatter tags
print('\n'.join(frontmatter_tags))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment