Skip to content

Instantly share code, notes, and snippets.

@dav009
Last active August 29, 2015 14:08
Show Gist options
  • Save dav009/cea69eeddb3ec26c51bd to your computer and use it in GitHub Desktop.
Save dav009/cea69eeddb3ec26c51bd to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from scrapely import Scraper
chunk1 = "More than 1.4 million people at any given time suffer from healthcare-associated infections"
chunk2 = "SSIs account for 14 percent of all"
url = "http://www.molnlycke.com/solutions/infection-prevention-surgery/costs-microbial-infections/"
url2 = "http://www.molnlycke.com/news-media/infection-prevention/barrier-scrub-suit-purple/"
chunk3 = "The BARRIER scrub suit family"
chunk4 = "We all know that when it comes to what we wear"
def add_sample(scraper, url, paragraph1, paragraph2):
scraper.train(url, {"p1": paragraph1, "p2": paragraph2}, encoding="utf8")
# annotating url and then url2
s = Scraper()
add_sample(s, url, chunk1, chunk2)
# It does its job!, happy town
print("\nscraping url")
print(s.scrape(url))
add_sample(s, url2, chunk3, chunk4)
# It does its job!, happy town
print("\nscraping url")
print(s.scrape(url))
# why is it empty?
print("\nscraping url2")
print(s.scrape(url2))
print("\n-----training first on url2 and then url------")
# annotating url2 and then url
s = Scraper()
add_sample(s, url2, chunk3, chunk4)
# It does its job!, happy town
print("\nscraping url2")
print(s.scrape(url2))
add_sample(s, url, chunk1, chunk2)
# why is it empty?
print("\nscraping url")
print(s.scrape(url))
# It does its job!, happy town
print("\nscraping url2")
print(s.scrape(url2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment