Skip to content

Instantly share code, notes, and snippets.

@hem6
Created June 17, 2018 03:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hem6/00320dc1458b0628a55ef27627b03d1c to your computer and use it in GitHub Desktop.
Save hem6/00320dc1458b0628a55ef27627b03d1c to your computer and use it in GitHub Desktop.
食べログの店舗情報をいい感じにScrapboxに登録するPythonistaスクリプト
import appex
import re
import requests
import urllib
from objc_util import *
from bs4 import BeautifulSoup
def main():
if not appex.is_running_extension():
print('Running in Pythonista app, using test data...\n')
url = 'https://tabelog.com/tokyo/A1302/A130201/13136102/'
else:
url = appex.get_url()
if url:
# TODO: Your own logic here...
print('Input URL: %s' % (url, ))
res = Restaurant.init_with_url(url)
if res:
print(res.name)
print(res.location)
print(res.category)
print(res.image)
baseURL = 'https://scrapbox.io/hem6-private/'
title = urllib.parse.quote(res.name)
raw_body = '[{}]\n[{} {}]\n#飲み屋 #{} #{}'.format(
res.image, res.page_title, res.page_url, res.location,
res.category)
body = urllib.parse.quote(raw_body)
scrapbox_url = baseURL + title + '?body=' + body
app = UIApplication.sharedApplication()
app.openURL_(nsurl(scrapbox_url))
else:
print('食べログのページじゃないかも')
else:
print('No input URL found.')
class Restaurant(object):
def __init__(self, name, location, category, image, page_title, page_url):
self.name = name
self.location = location
self.category = category
self.image = image
self.page_title = page_title
self.page_url = page_url
@classmethod
def init_with_url(cls, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html5lib')
title = soup.title.string
match = re.match(r'(.+) \- (.+)/(.+) \[食べログ\]', title)
if not match:
print('タイトルが食べログっぽくない: ', title)
return None
name = match.group(1)
location = match.group(2)
category = match.group(3)
og_image = soup.find('meta', property='og:image')
image = og_image['content'] if og_image else ''
return Restaurant(name, location, category, image, title, url)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment