Skip to content

Instantly share code, notes, and snippets.

@citrus-lemon
Created April 8, 2018 13:14
Show Gist options
  • Save citrus-lemon/2158e7cf0734b514cff40bae9fd8fd3e to your computer and use it in GitHub Desktop.
Save citrus-lemon/2158e7cf0734b514cff40bae9fd8fd3e to your computer and use it in GitHub Desktop.
Get LINE Sticker from Webpage
#!/usr/bin/env python3
'Get LINE Sticker'
import requests
import re
import os
import shutil
from urllib.parse import urlparse
from bs4 import BeautifulSoup
def getStickerInfo(url):
urlstruct = urlparse(url)
url = 'https://' + (urlstruct.netloc or 'store.line.me') + urlstruct.path
req = requests.get(url)
parser = BeautifulSoup(req.text, "lxml")
title = parser.select_one('.mdCMN08Ttl') .text
desc = parser.select_one('.mdCMN08Desc').text
stckers = parser.select_one('.mdCMN09ImgList')
stckers.select('.mdCMN09Li')
info_script = list(
filter(
lambda x:x.text.find('OPTIONS.config.sticker')!=-1,
parser.select('script')))[0].text
re_type = re.compile(r"type:\s*[\"](|animation|sound|soundanimation|popup|popupsound)[\"]")
try:
stickertype = re_type.search(info_script)[1]
except IndexError:
stickertype = ''
recommandlist = parser.select_one('.mdBox03Inner02')
recommands = recommandlist.select('.mdCMN01Li')
type_resource = {
'': ((('sticker', 'png'), "https://stickershop.line-scdn.net/stickershop/v1/sticker/{}/ANDROID/sticker.png;compress=true"),),
'sound': (
(('sticker', 'png'), "https://stickershop.line-scdn.net/stickershop/v1/sticker/{}/ANDROID/sticker.png;compress=true"),
(('sound' , 'm4a'), "https://stickershop.line-scdn.net/stickershop/v1/sticker/{}/IOS/sticker_sound.m4a;compress=true"))
}
if not stickertype in type_resource:
raise NotImplementedError('not implement this sticker')
re_ids = re.compile(r"ids:\s*\[([\d,\s]*?)\]")
ids_match = re_ids.search(info_script)
ids = None
if ids_match:
ids = list(map(lambda n:int(n.strip()), ids_match[1].split(',')))
else:
re_columnstyle = re.compile(r"stickerStyle\({\s*id:\s*(\d+)\s*}\)")
ids = list(map(lambda x:int(re_columnstyle.match(x.select_one('.mdCMN09Image')[':style'])[1]),
stckers.select('.mdCMN09Li')))
return {
'title': title,
'desc': desc,
'resource_type': (stickertype, tuple(map(lambda x:x[0], type_resource[stickertype]))),
'urls': tuple(map(lambda el:tuple(map(lambda x:x[1].format(el), type_resource[stickertype])), ids)),
'recommand': tuple(map(lambda x:x.select_one('a').attrs ,recommands))
}
def dumpSticker(info):
os.mkdir(info['title'])
print('{}'.format(info['title']))
res_t = info['resource_type'][1]
for el in range(len(info['urls'])):
for i in range(len(res_t)):
filename = '{}{}.{}'.format(el+1, res_t[i][0], res_t[i][1])
response = requests.get(info['urls'][el][i], stream=True)
# Throw an error for bad status codes
response.raise_for_status()
with open('{}/{}'.format(info['title'], filename), 'wb') as handle:
for block in response.iter_content(1024):
handle.write(block)
print(' sticker {} ({}) download'.format(el+1, res_t[i][0]))
import time,sys
def spider(first):
this = first
nl = []
num = 0
while num < 100:
try:
info = getStickerInfo(this)
except NotImplementedError:
print('Not imp', this)
pass
if len(nl) == 0: nl.extend(info['recommand'])
try:
try:
dumpSticker(info)
nl.extend(info['recommand'])
num += 1
except FileExistsError:
pass
this = nl.pop(0)['href']
time.sleep(5)
except (KeyboardInterrupt, SystemExit):
shutil.rmtree(info['title'], ignore_errors=True, onerror=None)
os._exit(1)
except:
shutil.rmtree(info['title'], ignore_errors=True, onerror=None)
if __name__ == "__main__":
def get(link):
info = getStickerInfo(link)
print('sticker name: %s' % info['title'])
print('sticker desc: %s' % info['desc'])
print('sticker type: %s' % (info['resource_type'] and info['resource_type'][0] or 'normal'))
try:
dumpSticker(info)
except FileExistsError:
print('WARNING: sticker %s exists' % info['title'])
except (KeyboardInterrupt, SystemExit):
print('Interrupt')
os._exit(1)
if len(sys.argv) > 2:
({
'spider': spider,
'get': get
})[sys.argv[1]](sys.argv[2])
else:
print('linesticker.py spider|get link')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment