Skip to content

Instantly share code, notes, and snippets.

@riceissa
Created August 6, 2018 18:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save riceissa/a5c5cc3e858f6267c265769c776ad9f0 to your computer and use it in GitHub Desktop.
Save riceissa/a5c5cc3e858f6267c265769c776ad9f0 to your computer and use it in GitHub Desktop.
diff --git a/proc.py b/proc.py
index a5cdafb..cc061ad 100755
--- a/proc.py
+++ b/proc.py
@@ -3,6 +3,8 @@
import sys
from bs4 import BeautifulSoup
+SEEN_SEPARATE = {}
+SEEN_ALL_IN_ONE = []
def mysql_quote(x):
"""Quote the string x using MySQL quoting rules. If x is the empty string,
@@ -29,6 +31,20 @@ def main():
print_sql(soup_to_grants_generator(soup, focus_area, month))
+ with open("grants-2018-08-06.html", "r") as f:
+ soup = BeautifulSoup(f, "lxml")
+ for grantee_div in soup.find("div", {"class": "grants-archive-inner"}).find_all("div", {"class": "grantee"}):
+ grantee = grantee_div.find("div", {"class": "grantee-name"}).text
+ info = grantee_div.find("div", {"class": "grantee-info"}).text
+ grantee_location, amount = info.split("•")
+ amount = float(amount.replace("$", "").replace(",", ""))
+ purpose = grantee_div.find("div", {"class": "grantee-purpose"}).text
+
+ tup = (grantee, amount, purpose)
+ SEEN_ALL_IN_ONE.append(tup)
+
+ print(len(SEEN_SEPARATE), len(SEEN_ALL_IN_ONE), file=sys.stderr)
+
def soup_to_grants_generator(soup, focus_area, month):
for grantee_div in soup.find("div", {"class": "grants-archive-inner"}).find_all("div", {"class": "grantee"}):
@@ -38,6 +54,11 @@ def soup_to_grants_generator(soup, focus_area, month):
amount = float(amount.replace("$", "").replace(",", ""))
purpose = grantee_div.find("div", {"class": "grantee-purpose"}).text
+ tup = (grantee, amount, purpose)
+ if tup in SEEN_SEPARATE:
+ print("SEEN", tup, SEEN_SEPARATE[tup], (focus_area, month), file=sys.stderr)
+ SEEN_SEPARATE[tup] = (focus_area, month)
+
focus_area_map = {
"community-development": "Community development",
"education-youth": "Education/Youth",
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment