Created
December 4, 2019 02:57
-
-
Save pteacher/7d7a69a669f9eda52915456e52283135 to your computer and use it in GitHub Desktop.
Attempt to get codepen.io projects via links [FAILED]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import requests | |
from fake_useragent import UserAgent | |
def createFolder(directory): | |
try: | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
except OSError: | |
print ('Error: Creating directory. ' + directory) | |
def all_files(path): | |
files = os.listdir(path) | |
for file in files: | |
if os.path.isfile(file): | |
#print file.split('_')[0] | |
f=open(file,'r') | |
for x in f: | |
s = re.findall(r'(https?://codepen.io/[A-Za-z/]+)', x) | |
if s != []: | |
print(s[0]) | |
url = s[0] | |
headers = {'User-agent': ua.random} | |
r = ses.get(url + ".html", headers=headers) | |
print r.content | |
open(file.split('_')[0] + '.html', 'wb').write(r.content) | |
ua = UserAgent() | |
ses = requests.Session() | |
all_files('.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment