Skip to content

Instantly share code, notes, and snippets.

@capjamesg
Created October 11, 2022 07:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save capjamesg/e8ded796a581e420d1d9bb0f28c64bb4 to your computer and use it in GitHub Desktop.
Save capjamesg/e8ded796a581e420d1d9bb0f28c64bb4 to your computer and use it in GitHub Desktop.
A script to count specified mf1 and mf2 properties from a string.
import json
import mf2py
def checker(mf2prop, mf1prop, filename):
with open(filename, "r") as file:
file_names = list(file.readlines())
counter = 0
for file_name in file_names:
file_name = (
file_name.strip().replace(":" + mf2prop, "").replace(":" + mf1prop, "")
)
counter += 1
print(file_name)
with open(file_name, "r") as file:
data = json.loads(file.read())
doc_url = data["_source"]["url"]
post_contents = data["_source"]["page_content"]
microformats_data = mf2py.parse(doc=post_contents)
# get h-review
if microformats_data.get("items"):
for item in microformats_data["items"]:
types = item["type"]
if mf2prop in types:
print(f"{file_name} has {mf2prop}")
else:
continue
print(doc_url)
with open(f"results-{mf2prop}.txt", "a") as f:
f.write(doc_url + "\n")
print("h-review")
# checker("h-review", "hreview")
print("h-review aggregate")
checker("h-review-aggregate", "hreview-aggregate", "hreviewaggregate.txt")
print("h-resume")
checker("h-resume", "hresume", "hresumes.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment