Created
June 17, 2018 03:17
-
-
Save hem6/00320dc1458b0628a55ef27627b03d1c to your computer and use it in GitHub Desktop.
食べログの店舗情報をいい感じにScrapboxに登録するPythonistaスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import appex | |
import re | |
import requests | |
import urllib | |
from objc_util import * | |
from bs4 import BeautifulSoup | |
def main(): | |
if not appex.is_running_extension(): | |
print('Running in Pythonista app, using test data...\n') | |
url = 'https://tabelog.com/tokyo/A1302/A130201/13136102/' | |
else: | |
url = appex.get_url() | |
if url: | |
# TODO: Your own logic here... | |
print('Input URL: %s' % (url, )) | |
res = Restaurant.init_with_url(url) | |
if res: | |
print(res.name) | |
print(res.location) | |
print(res.category) | |
print(res.image) | |
baseURL = 'https://scrapbox.io/hem6-private/' | |
title = urllib.parse.quote(res.name) | |
raw_body = '[{}]\n[{} {}]\n#飲み屋 #{} #{}'.format( | |
res.image, res.page_title, res.page_url, res.location, | |
res.category) | |
body = urllib.parse.quote(raw_body) | |
scrapbox_url = baseURL + title + '?body=' + body | |
app = UIApplication.sharedApplication() | |
app.openURL_(nsurl(scrapbox_url)) | |
else: | |
print('食べログのページじゃないかも') | |
else: | |
print('No input URL found.') | |
class Restaurant(object): | |
def __init__(self, name, location, category, image, page_title, page_url): | |
self.name = name | |
self.location = location | |
self.category = category | |
self.image = image | |
self.page_title = page_title | |
self.page_url = page_url | |
@classmethod | |
def init_with_url(cls, url): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html5lib') | |
title = soup.title.string | |
match = re.match(r'(.+) \- (.+)/(.+) \[食べログ\]', title) | |
if not match: | |
print('タイトルが食べログっぽくない: ', title) | |
return None | |
name = match.group(1) | |
location = match.group(2) | |
category = match.group(3) | |
og_image = soup.find('meta', property='og:image') | |
image = og_image['content'] if og_image else '' | |
return Restaurant(name, location, category, image, title, url) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment