Last active
March 11, 2017 10:56
-
-
Save InNoobWeTrust/27aaac7cfcab4aaf6effc897efacd191 to your computer and use it in GitHub Desktop.
Just a links getter for truyen.academyvn.com manga site. This is useful to reduce mobile data usage of yours. Just install termux and DroidScript, run the python script in termux to get the chapter and view it with DroidScript by the example JS script below (no backward page turning as I'm too lazy to care about looking back :] ). After running …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var links = []; | |
var viewIndex = 0; | |
var loadIndex = 0; | |
//Called when application is started. | |
function OnStart() | |
{ | |
lay = app.CreateLayout( "linear", "VCenter,FillXY" ); | |
//Scroller | |
scroll = app.CreateScroller( 1.0, 1.0 ); | |
lay.AddChild( scroll ); | |
layScroll = app.CreateLayout( "Linear", "Center" ); | |
scroll.AddChild( layScroll ); | |
//Manga page, touch to go to next page (no backward at this time) | |
img = app.CreateImage( "/Sys/Img/Droid1.png", 1.0, 0.9 ); | |
img.SetOnTouchUp( img_OnTouch ); | |
layScroll.AddChild( img ); | |
//Input block | |
layInput = app.CreateLayout('Linear', 'Horizontal,FillXY'); | |
layScroll.AddChild(layInput); | |
//Input link to chapter here. | |
edt = app.CreateTextEdit( "", 0.8 ); | |
edt.SetTextSize(9); | |
layInput.AddChild(edt); | |
//Click to get links of images | |
btn = app.CreateButton("Get links"); | |
btn.SetOnTouch( btn_OnTouch ); | |
layInput.AddChild(btn); | |
//Add layout to app. | |
app.AddLayout( lay ); | |
dload = app.CreateDownloader( ); | |
} | |
function img_OnTouch() | |
{ | |
if (loadIndex == links.length - 1) { | |
SetImage(); | |
} | |
} | |
function SetImage() | |
{ | |
arr = String(links[viewIndex]).split('/'); | |
path = "/sdcard/Download/academyvn/" + arr[arr.length - 1]; | |
img.SetImage(path); | |
img.Update(); | |
viewIndex++; | |
} | |
//Called when user touches our button. | |
function btn_OnTouch() | |
{ | |
SendRequest(edt.GetText()); | |
} | |
//Send an http get request. | |
function SendRequest( url ) | |
{ | |
var httpRequest = new XMLHttpRequest(); | |
httpRequest.onreadystatechange = function() { HandleReply(httpRequest); }; | |
httpRequest.open("GET", url, true); | |
httpRequest.send(null); | |
app.ShowProgress( "Loading..." ); | |
} | |
function CheckPageCount() | |
{ | |
loadIndex++; | |
if (loadIndex < links.length - 1) { | |
dload.SetOnComplete(CheckPageCount); | |
dload.Download( links[loadIndex], '/sdcard/Download/academyvn' ); | |
} else { | |
viewIndex = 0; | |
SetImage(); | |
} | |
} | |
//Handle the server's reply (a json object). | |
function HandleReply( httpRequest ) | |
{ | |
if( httpRequest.readyState==4 ) | |
{ | |
//If we got a valid response. | |
if( httpRequest.status==200 ) | |
{ | |
el = document.createElement( 'html' ); | |
el.innerHTML = httpRequest.responseText; | |
objectHTMLCollection = el.getElementsByClassName("manga-container")[0]; | |
objectHTMLCollection = objectHTMLCollection.getElementsByTagName("img"); | |
links = [].map.call( objectHTMLCollection, function(node){ | |
return node.src; | |
}); | |
index = 0; | |
app.MakeFolder('/sdcard/Download/academyvn'); | |
dload.SetOnComplete(CheckPageCount); | |
dload.Download( links[0], '/sdcard/Download/academyvn' ); | |
// app.ShowPopup( "Response: \n" + links); | |
} | |
//An error occurred | |
else | |
app.ShowPopup( "Error: \n" + httpRequest.responseText); | |
} | |
app.HideProgress(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import requests | |
import re | |
from collections import OrderedDict | |
try: | |
from html.parser import HTMLParser | |
except ImportError: | |
from HTMLParser import HTMLParser | |
import platform | |
if platform.system() == 'Windows': | |
import os | |
os.popen('chcp 65001') | |
search_link = 'http://truyen.academyvn.com/searchs' | |
search_query_params = { | |
'keyword': '', | |
'type': { | |
-1: 'Tất cả', | |
0: 'Khác', | |
1: 'Manga', | |
2: 'Manhwa', | |
3: 'Manhua', | |
}, | |
'author': { | |
-1: 'Tất cả', | |
}, | |
# 'genres[]': {}, | |
'status': { | |
-1: 'Tất cả', | |
0: 'Ngưng', | |
1: 'Đang tiến hành', | |
2: 'Đã hoàn thành', | |
}, | |
'submit': 'Tìm kiếm', | |
} | |
example_search = { | |
'keyword': 'Gintama', | |
'type': -1, | |
'author': -1, | |
'status': -1, | |
'submit': 'Tìm kiếm', | |
} | |
mangas_link_query_params = { | |
'filter_type': [ | |
'name', | |
'view', | |
'latest-chapter', | |
'latest-manga', | |
] | |
} | |
link_dict = OrderedDict([ | |
('mangas', 'http://truyen.academyvn.com/manga/all'), | |
('teams', 'http://truyen.academyvn.com/teams'), | |
('authors', 'http://truyen.academyvn.com/authors'), | |
]) | |
pagination_regex_dict = OrderedDict([ | |
('mangas', r'^(http://truyen.academyvn.com/manga/all\?page=(\d+))'), | |
('teams', r'^(http://truyen.academyvn.com/teams\?page=(\d+))'), | |
('authors', r'^(http://truyen.academyvn.com/authors\?page=(\d+))'), | |
('searchs', r'^(http://truyen.academyvn.com/searchs.*page=(\d+))'), | |
]) | |
category_regex_dict = OrderedDict([ | |
('mangas', r'(^http://truyen.academyvn.com/manga/\d+/\w+)'), | |
('teams', r'(^http://truyen.academyvn.com/teams/\d+/\w+)'), | |
('authors', r'(^http://truyen.academyvn.com/authors/\d+/\w+)'), | |
]) | |
manga_regex = r'(^http://truyen.academyvn.com/manga/\d+/\w+)' | |
chapter_regex = r'(^http://truyen.academyvn.com/chapter/\d+/[\w-]+)' | |
class PaginationHTMLParser(HTMLParser): | |
def __init__(self, regex): | |
super(PaginationHTMLParser, self).__init__() | |
self.in_pagination = False | |
self.complete = False | |
self.pages = 1 | |
self.regex = regex | |
def handle_starttag(self, tag, attrs): | |
if self.complete: | |
return | |
if not self.in_pagination and tag == 'ul': | |
for name, value in attrs: | |
if value == 'pagination no-margin' and name == 'class': | |
self.in_pagination = True | |
elif tag == 'a': | |
for name, value in attrs: | |
if name == 'href': | |
match = re.search(self.regex, value) | |
try: | |
int(match.group(2)) | |
except Exception: | |
continue | |
if match.group(2) and int(match.group(2)) > self.pages: | |
self.pages = int(match.group(2)) | |
def handle_endtag(self, tag): | |
if not self.complete and self.in_pagination and tag == 'ul': | |
self.complete = True | |
class CategoryHTMLParser(HTMLParser): | |
@property | |
def links(self): | |
return self.__links | |
@links.setter | |
def links(self, links): | |
self.__links = links | |
@property | |
def last(self): | |
return self.__last | |
@last.setter | |
def last(self, last): | |
self.__last = last | |
def __init__(self): | |
super(CategoryHTMLParser, self).__init__() | |
self.last = -1 | |
self.links = [] | |
self.in_manga = False | |
self.complete = False | |
def handle_starttag(self, tag, attrs): | |
if self.complete: | |
return | |
if not self.in_manga and tag == 'div': | |
for key, value in attrs: | |
if key == 'class' and value == 'table-responsive': | |
self.in_manga = True | |
elif self.in_manga and tag == 'a': | |
for name, value in attrs: | |
if name == 'href': | |
self.links.append(value) | |
self.last += 1 | |
def handle_endtag(self, tag): | |
if not self.complete and self.in_manga and tag == 'div': | |
self.complete = True | |
class ChapterHTMLParser(CategoryHTMLParser): | |
def handle_data(self, data): | |
if not self.complete and data == 'Các bản dịch khác': | |
self.complete = True | |
class ImageHTMLParser(CategoryHTMLParser): | |
def handle_starttag(self, tag, attrs): | |
if self.complete: | |
return | |
if not self.in_manga and tag == 'div': | |
for name, value in attrs: | |
if name == 'class' and value == 'manga-container': | |
self.in_manga = True | |
elif self.in_manga and tag == 'img': | |
for name, value in attrs: | |
if name == 'src': | |
self.links.append(value) | |
self.last += 1 | |
def parse_page(number, category='mangas', text='', regex='*'): | |
num = 0 | |
page_file_path = category + '_page_' + str(number) + '.txt' | |
with open(page_file_path, mode='w+', encoding='UTF-8', newline='\r\n') as pagin: | |
category_parser = CategoryHTMLParser() | |
category_parser.feed(text) | |
for link in category_parser.links: | |
if re.search(regex, link): | |
num += 1 | |
print(num, '@ ', link) | |
pagin.writelines(link + '\r\n') | |
pagin.flush() | |
num += category_parser.last + 1 | |
return num | |
def refresh_pages(page_number=None, category=list(link_dict.keys())[0]): | |
if category not in link_dict.keys(): | |
return -1 # Unknown value | |
# TODO: support teams and authors | |
elif category != 'mangas': | |
return -9999 # Not supported yet! | |
page = requests.get(link_dict[category]) | |
pagination_parser = PaginationHTMLParser(pagination_regex_dict[category]) | |
pagination_parser.feed(page.text) | |
if pagination_parser.pages == 1: | |
print('Error parsing pagination!') | |
return -2 | |
print('Pages:', pagination_parser.pages) | |
if not page_number: | |
input('Getting all pages! This will be extremely slow. Press anykey to continue...') | |
number_of_links = 0 | |
for i in range(2, pagination_parser.pages + 1): | |
print('Getting page', i, '...') | |
page = requests.get(link_dict[category], params={'page': i}) | |
number_of_links += parse_page( | |
i, | |
category=category, | |
text=page.text, | |
regex=category_regex_dict[category] | |
) | |
return number_of_links | |
else: | |
if page_number < 1 or page_number > pagination_parser.pages: | |
print('Error: Page number must be from 1 to', pagination_parser.pages, '!') | |
return -3 | |
else: | |
print('Getting page', page_number, '...') | |
page = requests.get(link_dict[category], params={'page': page_number}) | |
return parse_page( | |
page_number, | |
category=category, | |
text=page.text, | |
regex=category_regex_dict[category] | |
) | |
def get_chapters(page, index, is_search=False): | |
number_of_chapters = 0 | |
manga_file_path = 'mangas_page_' + str(page) + '.txt' | |
if is_search: | |
manga_file_path = 'searchs_page_' + str(page) + '.txt' | |
print('Getting list manga in', manga_file_path, 'for reading...') | |
try: | |
with open(manga_file_path, mode='r', encoding='UTF-8', newline='\r\n') as mangas: | |
manga = '' | |
for i, line in enumerate(mangas): | |
if i == index: | |
manga = line | |
break | |
print('Getting manga:', manga) | |
page = requests.get(manga) | |
chapter_parser = ChapterHTMLParser() | |
chapter_parser.feed(page.text) | |
with open('chapters.txt', mode='w+', encoding='UTF-8', newline='\r\n') as chapters: | |
index = 1 | |
for link in chapter_parser.links: | |
print(index, '@ ', link) | |
index += 1 | |
chapters.writelines(link + '\r\n') | |
chapters.flush() | |
number_of_chapters += chapter_parser.last + 1 | |
chapters.flush() | |
except Exception: | |
print('File not found:', manga_file_path, '. Refresh manga list and try again!') | |
return -1 | |
return number_of_chapters | |
def get_images(index): | |
number_of_images = 0 | |
try: | |
with open('chapters.txt', mode='r', encoding='UTF-8', newline='\r\n') as chapters: | |
chapter = '' | |
for i, line in enumerate(chapters): | |
if i == index: | |
chapter = line | |
break | |
print('Getting chapter:', chapter) | |
page = requests.get(chapter) | |
image_parser = ImageHTMLParser() | |
image_parser.feed(page.text) | |
with open('images.txt', 'w+', encoding='UTF-8', newline='\r\n') as images: | |
index = 1 | |
for link in image_parser.links: | |
print(index, '@ ', link) | |
index += 1 | |
images.writelines(link + '\r\n') | |
images.flush() | |
number_of_images += image_parser.last + 1 | |
except Exception: | |
print('File not found: chapters.txt. Refresh chapter list and try again!') | |
return -1 | |
return number_of_images | |
def search(query_params=example_search, limit_pages=2): | |
print('Searching with params: ', query_params, '...') | |
page = requests.get(search_link, params=query_params) | |
if 'page' in query_params.keys(): | |
return parse_page( | |
query_params['page'], | |
category='searchs', | |
text=page.text, | |
regex=manga_regex | |
) | |
pagination_parser = PaginationHTMLParser(pagination_regex_dict['searchs']) | |
pagination_parser.feed(page.text) | |
print('Pages:', pagination_parser.pages) | |
if limit_pages > pagination_parser.pages: | |
limit_pages = pagination_parser.pages | |
if limit_pages <= 1: | |
return parse_page( | |
1, | |
category='searchs', | |
text=page.text, | |
regex=manga_regex | |
) | |
print('Maximum pages to scan is', limit_pages) | |
number_of_links = 0 | |
for i in range(1, limit_pages): | |
print('Getting page', i, '...') | |
query_params['page'] = i | |
page = requests.get(search_link, params=query_params) | |
number_of_links += parse_page( | |
i, | |
category='searchs', | |
text=page.text, | |
regex=manga_regex | |
) | |
return number_of_links | |
if __name__ == '__main__': | |
search_string = str(input('Manga to search for: ')) | |
example_search['keyword'] = search_string | |
limit = int(input('Number of paginations to scan: ')) | |
n_mangas = search(query_params=example_search, limit_pages=limit) | |
print('Number of mangas:', n_mangas) | |
if n_mangas > 0: | |
page = 1 | |
try: | |
page = int(input('Number of the page you want (Enter to default to 1): ')) | |
except Exception: | |
pass | |
index = int(input('Number of the link you want: ')) - 1 | |
n_chapters = get_chapters(page, index, is_search=True) | |
print('Number of chapters:', n_chapters) | |
if n_chapters > 0: | |
chap = int(input('Number of the link to chapter you want: ')) - 1 | |
n_images = get_images(chap) | |
print('Number of images:', n_images) | |
# n_mangas = refresh_pages(page_number=1, category='mangas') | |
# print('Number of mangas:', n_mangas) | |
# if n_mangas > 0: | |
# n_chapters = get_chapters(1, 0) | |
# print('Number of chapters:', n_chapters) | |
# if n_chapters > 0: | |
# print('Getting last chapter...') | |
# n_images = get_images(0) | |
# print('Number of images:', n_images) | |
# n_chapters = get_chapters(10, 0) | |
# if n_chapters != -1: | |
# print('Number of chapters: ', n_chapters) | |
# print('Getting last chapter...') | |
# n_images = get_images(0) | |
# if n_images != -1: | |
# print('Number of images: ', n_images) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment