groups = scraper.get_result_similar(url, grouped=True)
Since groups is a dictionary, you can get the names of the rules by calling
groups.keys()
You can then key into the dictionary using a particular review.
from autoscraper import AutoScraper | |
# replace with desired url | |
url = 'https://www.yelp.com/biz/chun-yang-tea-flushing-new-york-flushing' | |
# make sure that autoscraper can exactly match the items in your wanted_list | |
wanted_list = ['A review'] # replace with item(s) of interest | |
# build the scraper | |
scraper = AutoScraper() | |
result = scraper.build(url, wanted_list) |
groups = scraper.get_result_similar(url, grouped=True)
Since groups is a dictionary, you can get the names of the rules by calling
groups.keys()
You can then key into the dictionary using a particular review.
#0 | |
from autoscraper import AutoScraper | |
#1 | |
url = "https://www.yelp.com/biz/chun-yang-tea-flushing-new-york-flushing?osq=bubble%20tea&sort_by=date_desc" | |
#2 | |
wanted_list = ['Tried their Brown Sugar milk tea and it was not bad compare to Tiger Sugar. I prefer this over Tiger Sugar due to the L size option and sweetness content. It was my to go bubble tea spot for the last two days straight. Will visit again!'] | |
#3 |
from bs4 import BeautifulSoup | |
url = "https://www.yelp.com/biz/chun-yang-tea-flushing-new-york-flushing?osq=bubble%20tea&sort_by=date_desc" | |
browser.get(url) | |
response = BeautifulSoup(browser.page_source, 'html.parser') | |
published = response.find_all('span', class_='lemon--span__373c0__3997G raw__373c0__3rKqk') |
from selenium import webdriver | |
def setup_browser(driver_path=None, headless=False): | |
op = webdriver.ChromeOptions() | |
if headless: | |
op.add_argument('headless') | |
if driver_path: | |
return ebdriver.Chrome(driver_path, options=op) | |
# this depends on where you install chromedriver |
# 0: returns the detected language | |
detected.lang | |
# 1: returns the confidence in the predicted language | |
detected.confidence |
# 0: returns the detected source language | |
translated.src | |
# 1: returns the language the text has been translated to | |
translated.dest | |
# 2: returns the original text | |
translated.origin | |
# 3: returns the translated text |
#0 import libraries | |
from googletrans import Translator | |
import multiprocessing | |
import workers | |
#1 create Translator object | |
translator = Translator() | |
#2 create multiprocessing pool | |
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) |
from googletrans import Translator | |
translator = Translator() | |
def google_translate(text, dest='en'): | |
return translator.translate(text, dest=dest) |
if isinstance(text, list): | |
result = [] | |
for item in text: | |
translated = self.translate(item, dest=dest, src=src, **kwargs) | |
result.append(translated) | |
return result |