Skip to content

Instantly share code, notes, and snippets.

@alpenmilch411
Created January 4, 2016 10:03
Show Gist options
  • Save alpenmilch411/363496ff81b7b8d8f3ae to your computer and use it in GitHub Desktop.
Save alpenmilch411/363496ff81b7b8d8f3ae to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import re
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
right = []
for url in links:
url_list.append((url.get('href')))
for link in url_list: # Iterates through every line and looks for a match:
if re.match(r"https://bluesilvertranslations\.wordpress\.com/\d{4}/\d{2}/\d{2}/douluo-dalu-", str(link)) and '#comment' not in link:
right.append(link)
return remove_duplicates(right)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'https://bluesilvertranslations.wordpress.com/category/douluo-dalu/'
link_list = get_chapter_links(links)
file2_out = open('url.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
import requests
from bs4 import BeautifulSoup
import re
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
right = []
for url in links:
url_list.append((url.get('href')))
for link in url_list: # Iterates through every line and looks for a match:
if re.match(r"https://bluesilvertranslations\.wordpress\.com/\d{4}/\d{2}/\d{2}/douluo-dalu-", str(link)) and '#comment' not in link:
right.append(link)
return remove_duplicates(right)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'https://bluesilvertranslations.wordpress.com/category/douluo-dalu/'
link_list = get_chapter_links(links)
file2_out = open('url.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-184-divine-craftsman-lou-gao/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-183-capital-of-metal-gengxin-city/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-182-four-elements-academies-intention/
https://bluesilvertranslations.wordpress.com/2015/12/27/douluo-dalu-181-five-year-gap-one-to-three/
https://bluesilvertranslations.wordpress.com/2015/12/20/douluo-dalu-179-tang-sans-sixth-spirit-ability-nothingness-instant-kill-eight-stage-drop/
https://bluesilvertranslations.wordpress.com/2015/12/14/douluo-dalu-178-tang-san-vs-yang-wudi/
import requests
from bs4 import BeautifulSoup
import os
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/issth-index/issth-book-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/issth-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
import requests
from bs4 import BeautifulSoup
import os
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/issth-index/issth-book-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/issth-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-378/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-377/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-376/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-375/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-374/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-373/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-372/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-371/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-370/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-369/?
import requests
from bs4 import BeautifulSoup
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/mga-index/mga-chapter-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/mga-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
import requests
from bs4 import BeautifulSoup
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/mga-index/mga-chapter-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/mga-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
http://www.wuxiaworld.com/mga-index/mga-chapter-610/
http://www.wuxiaworld.com/mga-index/mga-chapter-609/
http://www.wuxiaworld.com/mga-index/mga-chapter-606/
http://www.wuxiaworld.com/mga-index/mga-chapter-604/
http://www.wuxiaworld.com/mga-index/mga-chapter-600/
http://www.wuxiaworld.com/mga-index/mga-chapter-570/
http://www.wuxiaworld.com/mga-index/mga-chapter-563/
http://www.wuxiaworld.com/mga-index/mga-chapter-559/
http://www.wuxiaworld.com/mga-index/mga-chapter-556/
http://www.wuxiaworld.com/mga-index/mga-chapter-552/
import requests
from bs4 import BeautifulSoup
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/atg-index/atg-chapter-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/atg-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
import requests
from bs4 import BeautifulSoup
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/atg-index/atg-chapter-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/atg-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
http://www.wuxiaworld.com/atg-index/atg-chapter-314/
http://www.wuxiaworld.com/atg-index/atg-chapter-313/
http://www.wuxiaworld.com/atg-index/atg-chapter-312/
http://www.wuxiaworld.com/atg-index/atg-chapter-311/
http://www.wuxiaworld.com/atg-index/atg-chapter-310/
http://www.wuxiaworld.com/atg-index/atg-chapter-309/
http://www.wuxiaworld.com/atg-index/atg-chapter-308/
http://www.wuxiaworld.com/atg-index/atg-chapter-307/
http://www.wuxiaworld.com/atg-index/atg-chapter-306/
http://www.wuxiaworld.com/atg-index/atg-chapter-305/
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.3 (/Library/Frameworks/Python.framework/Versions/3.4/bin/python3.4)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pushbullet_notification.iml" filepath="$PROJECT_DIR$/.idea/pushbullet_notification.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/DoDa" vcs="Git" />
<mapping directory="$PROJECT_DIR$/ISSTH" vcs="Git" />
<mapping directory="$PROJECT_DIR$/MGA" vcs="Git" />
<mapping directory="$PROJECT_DIR$/NTXS" vcs="Git" />
</component>
</project>
import requests
from bs4 import BeautifulSoup
import re
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
right = []
for url in links:
url_list.append((url.get('href')))
for link in url_list: # Iterates through every line and looks for a match:
if re.match(r"https://bluesilvertranslations\.wordpress\.com/\d{4}/\d{2}/\d{2}/douluo-dalu-", str(link)) and '#comment' not in link:
right.append(link)
return remove_duplicates(right)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'https://bluesilvertranslations.wordpress.com/category/douluo-dalu/'
link_list = get_chapter_links(links)
file2_out = open('url.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-184-divine-craftsman-lou-gao/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-183-capital-of-metal-gengxin-city/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-182-four-elements-academies-intention/
https://bluesilvertranslations.wordpress.com/2015/12/27/douluo-dalu-181-five-year-gap-one-to-three/
https://bluesilvertranslations.wordpress.com/2015/12/20/douluo-dalu-179-tang-sans-sixth-spirit-ability-nothingness-instant-kill-eight-stage-drop/
https://bluesilvertranslations.wordpress.com/2015/12/14/douluo-dalu-178-tang-san-vs-yang-wudi/
import requests
from bs4 import BeautifulSoup
import os
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/issth-index/issth-book-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/issth-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-378/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-377/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-376/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-375/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-374/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-373/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-372/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-371/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-370/?
http://www.wuxiaworld.com/issth-index/issth-book-4-chapter-369/?
import requests
from bs4 import BeautifulSoup
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/mga-index/mga-chapter-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/mga-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
http://www.wuxiaworld.com/mga-index/mga-chapter-610/
http://www.wuxiaworld.com/mga-index/mga-chapter-609/
http://www.wuxiaworld.com/mga-index/mga-chapter-606/
http://www.wuxiaworld.com/mga-index/mga-chapter-604/
http://www.wuxiaworld.com/mga-index/mga-chapter-600/
http://www.wuxiaworld.com/mga-index/mga-chapter-570/
http://www.wuxiaworld.com/mga-index/mga-chapter-563/
http://www.wuxiaworld.com/mga-index/mga-chapter-559/
http://www.wuxiaworld.com/mga-index/mga-chapter-556/
http://www.wuxiaworld.com/mga-index/mga-chapter-552/
import requests
from bs4 import BeautifulSoup
from pushbullet import Pushbullet
api_key = 'YOUR KEY'
pb = Pushbullet(api_key)
def remove_duplicates(values):
output = []
seen = set()
for value in values:
# If value has not been encountered yet,
# ... add it to both list and set.
if value not in seen:
output.append(value)
seen.add(value)
return output
#Gets chapter links
def get_chapter_links(index_url):
r = requests.get(index_url)
soup = BeautifulSoup(r.content, 'lxml')
links = soup.find_all('a')
url_list = []
for url in links:
if 'http://www.wuxiaworld.com/atg-index/atg-chapter-' in str(url) and 'comment-page' not in str(url):
url_list.append((url.get('href')))
return remove_duplicates(url_list)
def get_title(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
title = soup.find_all('h1',{'class':'entry-title'})
chapter_title = ''
for l in title:
chapter_title += l.text
return chapter_title
links = 'http://www.wuxiaworld.com/category/atg-chapter-release/'
link_list = get_chapter_links(links)
file2_out = open('url_list.txt', 'a') #local url list for chapter check
for x in link_list:
#Checking chapter existence in folder and downloading chapter
if x not in open('url_list.txt').read(): #Is url of chapter in local url list?
push = pb.push_note(get_title(x), x)
file2_out.write('{}\n'.format(x)) #adding downloaded chapter to local url list
print('{} saved.'.format(x))
file2_out.close()
http://www.wuxiaworld.com/atg-index/atg-chapter-314/
http://www.wuxiaworld.com/atg-index/atg-chapter-313/
http://www.wuxiaworld.com/atg-index/atg-chapter-312/
http://www.wuxiaworld.com/atg-index/atg-chapter-311/
http://www.wuxiaworld.com/atg-index/atg-chapter-310/
http://www.wuxiaworld.com/atg-index/atg-chapter-309/
http://www.wuxiaworld.com/atg-index/atg-chapter-308/
http://www.wuxiaworld.com/atg-index/atg-chapter-307/
http://www.wuxiaworld.com/atg-index/atg-chapter-306/
http://www.wuxiaworld.com/atg-index/atg-chapter-305/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-184-divine-craftsman-lou-gao/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-183-capital-of-metal-gengxin-city/
https://bluesilvertranslations.wordpress.com/2016/01/03/douluo-dalu-182-four-elements-academies-intention/
https://bluesilvertranslations.wordpress.com/2015/12/27/douluo-dalu-181-five-year-gap-one-to-three/
https://bluesilvertranslations.wordpress.com/2015/12/20/douluo-dalu-179-tang-sans-sixth-spirit-ability-nothingness-instant-kill-eight-stage-drop/
https://bluesilvertranslations.wordpress.com/2015/12/14/douluo-dalu-178-tang-san-vs-yang-wudi/
http://www.wuxiaworld.com/atg-index/atg-chapter-314/
http://www.wuxiaworld.com/atg-index/atg-chapter-313/
http://www.wuxiaworld.com/atg-index/atg-chapter-312/
http://www.wuxiaworld.com/atg-index/atg-chapter-311/
http://www.wuxiaworld.com/atg-index/atg-chapter-310/
http://www.wuxiaworld.com/atg-index/atg-chapter-309/
http://www.wuxiaworld.com/atg-index/atg-chapter-308/
http://www.wuxiaworld.com/atg-index/atg-chapter-307/
http://www.wuxiaworld.com/atg-index/atg-chapter-306/
http://www.wuxiaworld.com/atg-index/atg-chapter-305/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment