Skip to content

Instantly share code, notes, and snippets.

@kbcarte
Created August 9, 2023 20:15
Show Gist options
  • Save kbcarte/404f5c593d04a76842543fc2461a5835 to your computer and use it in GitHub Desktop.
Save kbcarte/404f5c593d04a76842543fc2461a5835 to your computer and use it in GitHub Desktop.
Goes through a csv of urls and meta info from RankMath export, the gets the post or page ID's of the page to update the exported report.
import requests, csv, time, re
from bs4 import BeautifulSoup
def get_page_id(url):
page = requests.get(url)
raw = page.text
soup = BeautifulSoup(raw, 'html.parser')
body = soup.find("body")
for i in body["class"]:
match = re.search(r'term-\d+', i)
if match:
return match.group(0).split("-")[1]
if "postid-" in i:
return i.split("-")[1]
def write_new_csv(new_csv_list):
with open("py_with_ids.csv", 'w') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerows(new_csv_list)
new_csv = [["id","object_type","slug","seo_title","seo_description","is_pillar_content","focus_keyword","seo_score","robots","advanced_robots","canonical_url","primary_term","schema_data","social_facebook_thumbnail","social_facebook_title","social_facebook_description","social_twitter_thumbnail","social_twitter_title","social_twitter_description"]]
with open('website_meta.csv', mode ='r') as file:
csvFile = csv.reader(file)
for lines in csvFile:
if(lines[0] == "id"):
continue
pid = get_page_id(lines[2])
lines[0] = pid
new_csv.append(lines)
print(lines)
time.sleep(.5)
write_new_csv(new_csv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment