Skip to content

Instantly share code, notes, and snippets.

@valdergallo
Last active March 7, 2022 18:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save valdergallo/ab5d57b72e5710be523790ebdf1c9a24 to your computer and use it in GitHub Desktop.
Save valdergallo/ab5d57b72e5710be523790ebdf1c9a24 to your computer and use it in GitHub Desktop.
parser reviewers from github in on csv
import csv
import os
from dataclasses import dataclass
from typing import Generator
from bs4 import BeautifulSoup
FILENAME = "content.html"
DATABASE = "reviewers.csv"
def _get_content(filename: str = FILENAME) -> str:
if os.path.exists(filename):
return open(filename).read()
return ""
def _clear_string(value) -> str:
return str(value).strip().replace("\n", "")
@dataclass
class SoupUser:
username: str
type: str
status: bool
def __repr__(self) -> str:
return f"<SoupUser: {self.username}>"
def to_dict(self):
return {"username": self.username, "type": self.type, "status": self.status}
def read_content(filename: str = FILENAME) -> Generator:
content: str = _get_content(filename=filename)
soup = BeautifulSoup(content, "html.parser")
for item in soup.find_all("span", "css-truncate-target"):
user = _clear_string(item.text)
user_type = item.parent.get(
"data-hovercard-type", item.parent.parent.get("data-hovercard-type")
)
user_status = item.find_next("span", "reviewers-status-icon")
status = bool("color-text-success" in user_status.svg["class"])
yield SoupUser(user, user_type, status)
def save_data(content_iter: Generator):
with open(DATABASE, "w", newline="") as csvfile:
fieldnames = ["username", "type", "status"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
print("Reviewers ", "-" * 40)
for line in content_iter:
print(f"user: {line.username}")
print(f"type: {line.type}")
print(f"status: {line.status}")
print("-" * 50)
writer.writerow(line.to_dict())
def main():
content_iter = read_content()
save_data(content_iter=content_iter)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment