Skip to content

Instantly share code, notes, and snippets.

@dohvis
Last active December 1, 2018 01:54
Show Gist options
  • Save dohvis/48aaf04b2d7405e60363cc6846efe3a9 to your computer and use it in GitHub Desktop.
Save dohvis/48aaf04b2d7405e60363cc6846efe3a9 to your computer and use it in GitHub Desktop.
goodreads book add helper
import re
import sys
from requests import get
from requests_html import (
HTMLSession,
)
s = HTMLSession()
url = sys.argv[1]
resp = s.get(url)
html = resp.html
title = html.find(".p_topt01", first=True).text
try:
subtitle = html.find("span.p_tbig16", first=True).text.split("- ")[-1]
except:
subtitle = None
table = html.find('.pwrap_bgtit > table > tr')[1].find('td', first=True).text
author = html.find('a.np_af', first=True).full_text
publisher, pub_date = list(map(lambda x: x.replace(' ', ''), table.split('|')))[1:]
page_cnt= re.search(r'(\d+)(?=쪽)', html.find('.p_goodstd03')[1].find('td')[0].text).groups()[0]
isbn_13 = html.find('.p_goodstd03')[1].find('td')[0].text.split("ISBN : ")[1]
image_url = html.find(".pwrap_bgbd_foot>table>tr>td>table", first=True).find('img', first=True).attrs['src']
year, month, day = re.search(r'(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+)', pub_date).groups()
month = month.replace("0", '')
day = day.replace('0', '')
#for value in (title, subtitle, author, pub, pub_date, isbn_10, isbn_13, page_cnt):
# print(value)
field_ids = (
"book_title",
'book_sort_by_title',
'author_name',
'book_isbn13',
'book_publisher',
'book_publication_year',
'book_publication_month',
'book_publication_day',
'book_num_pages',
)
values = (
title,
f'{title}, {subtitle}' if subtitle else title,
author,
isbn_13,
publisher,
year,
month,
day,
page_cnt,
)
for field, value in zip(field_ids, values):
output = f'jQuery("#{field}").val("{value}")'
print(output)
print('jQuery("#book_language_code").val("kor")')
with open("/tmp/a.jpg", "wb") as fp:
fp.write(get(image_url).content)
import re
import sys
from requests import get
from requests_html import (
HTMLSession,
)
s = HTMLSession()
url = sys.argv[1]
resp = s.get(url)
title = resp.html.find(".gd_name", first=True).text
try:
subtitle = resp.html.find(".gd_nameE", first=True).text
except:
subtitle = None
author = resp.html.find(".gd_auth > a", first=True).text.split(" 저")[0]
pub_date = resp.html.find(".gd_date", first=True).text
publisher = resp.html.find(".gd_pub > a", first=True).text
page_cnt= resp.html.find(".cell_2col")[1].text.split("쪽")[0]
isbn_13 = resp.html.find(".cell_2col")[2].text
isbn_10 = resp.html.find(".cell_2col")[3].text
image_url = resp.html.find("#yDetailTopWrap > div.topColLft > div > span > em > img", first=True).attrs['src']
year, month, day = re.search(r'(?P<year>\d+)년 (?P<month>\d+)월 (?P<day>\d+)일', pub_date).groups()
month = month.replace("0", '')
day = day.replace('0', '')
#for value in (title, subtitle, author, pub, pub_date, isbn_10, isbn_13, page_cnt):
# print(value)
field_ids = (
"book_title",
'book_sort_by_title',
'author_name',
'book_isbn',
'book_isbn13',
'book_publisher',
'book_publication_year',
'book_publication_month',
'book_publication_day',
'book_num_pages',
)
values = (
title,
f'{title}, {subtitle}' if subtitle else title,
author,
isbn_10,
isbn_13,
publisher,
year,
month,
day,
page_cnt,
)
for field, value in zip(field_ids, values):
output = f'jQuery("#{field}").val("{value}")'
print(output)
with open("/tmp/a.jpg", "wb") as fp:
fp.write(get(image_url).content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment