capjamesg/hitem-reader.py

## hitem-reader.py
import json

import mf2py


def checker(mf2prop, mf1prop, filename):
    with open(filename, "r") as file:
        file_names = list(file.readlines())

    counter = 0

    for file_name in file_names:
        file_name = (
            file_name.strip().replace(":" + mf2prop, "").replace(":" + mf1prop, "")
        )

        counter += 1
        print(file_name)
        with open(file_name, "r") as file:
            data = json.loads(file.read())

            doc_url = data["_source"]["url"]

            post_contents = data["_source"]["page_content"]
            microformats_data = mf2py.parse(doc=post_contents)

            # get h-review

            if microformats_data.get("items"):
                for item in microformats_data["items"]:
                    types = item["type"]

                    if mf2prop in types:
                        print(f"{file_name} has {mf2prop}")
                    else:
                        continue

                    print(doc_url)

                    with open(f"results-{mf2prop}.txt", "a") as f:
                        f.write(doc_url + "\n")


print("h-review")
# checker("h-review", "hreview")
print("h-review aggregate")
checker("h-review-aggregate", "hreview-aggregate", "hreviewaggregate.txt")
print("h-resume")
checker("h-resume", "hresume", "hresumes.txt")
	import json

	import mf2py


	def checker(mf2prop, mf1prop, filename):
	with open(filename, "r") as file:
	file_names = list(file.readlines())

	counter = 0

	for file_name in file_names:
	file_name = (
	file_name.strip().replace(":" + mf2prop, "").replace(":" + mf1prop, "")
	)

	counter += 1
	print(file_name)
	with open(file_name, "r") as file:
	data = json.loads(file.read())

	doc_url = data["_source"]["url"]

	post_contents = data["_source"]["page_content"]
	microformats_data = mf2py.parse(doc=post_contents)

	# get h-review

	if microformats_data.get("items"):
	for item in microformats_data["items"]:
	types = item["type"]

	if mf2prop in types:
	print(f"{file_name} has {mf2prop}")
	else:
	continue

	print(doc_url)

	with open(f"results-{mf2prop}.txt", "a") as f:
	f.write(doc_url + "\n")


	print("h-review")
	# checker("h-review", "hreview")
	print("h-review aggregate")
	checker("h-review-aggregate", "hreview-aggregate", "hreviewaggregate.txt")
	print("h-resume")
	checker("h-resume", "hresume", "hresumes.txt")