Created
August 7, 2019 08:42
-
-
Save tranchausky/2c3406bed7cd846aef63c54cd042ba3d to your computer and use it in GitHub Desktop.
clone carw data python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
from bs4 import BeautifulSoup | |
import sys | |
''' | |
url= "https://www.skptricks.com/search/label/React%20Native" | |
r = requests.get(url) | |
soup = BeautifulSoup(r.content) | |
product = [] | |
g_data = soup.find_all("h2", {"class": "entry-title"}) | |
for item in g_data: | |
try: | |
#product.set_<field_name>(item.find_all("h2", {"class": "post-title"})[0].text) | |
print('-------------------') | |
#print(item) | |
#sys.exit() | |
#product.set_<2222>(123123) | |
#product.set_<field_name>("h2", {"class": "post-title"})[0].text | |
#product.set_<field_name>("span", {"class": "info"})[0].text | |
#product.set_<field_name>("span", {"class": "info"})[1].text | |
#product.set_<field_name>("span", {"class": "info"})[2].text | |
#product.set_<field_name>("span", {"class": "price right right10"})[0].text | |
#print(item.find_all("a", {})[0].text) | |
print(item.find_all("a", {})[0].text) | |
print(item.find_all("a", {})[0]['href']) | |
nhap =[] | |
nhap.append(item.find_all("a", {})[0].text) | |
nhap.append(item.find_all("a", {})[0]['href']) | |
#infora = item.find_all("a", href=True) | |
#print(infora('href')) | |
product.append(nhap) | |
except: | |
pass | |
import json | |
file = open("filename", "w") | |
output = {"product1": product} | |
json.dump(output, file) | |
file.close() | |
''' | |
def clonebyurl (strlink): | |
#strlink= "https://www.skptricks.com/search/label/React%20Native" | |
r = requests.get(strlink) | |
soup = BeautifulSoup(r.content) | |
product = [] | |
g_data = soup.find_all("h2", {"class": "entry-title"}) | |
g_next_info = soup.find_all("a", {"class": "blog-pager-older-link"}) | |
#print('.....................') | |
g_next_link =g_next_info[0]['href'] | |
for item in g_data: | |
try: | |
#print('-------------------') | |
#print(item.find_all("a", {})[0].text) | |
#print(item.find_all("a", {})[0]['href']) | |
nhap =[] | |
nhap.append(item.find_all("a", {})[0].text) | |
nhap.append(item.find_all("a", {})[0]['href']) | |
product.append(nhap) | |
except: | |
pass | |
result = {}; | |
result['data'] = product | |
result['next'] = g_next_link | |
return result; | |
data = clonebyurl('https://www.skptricks.com/search/label/React%20Native?updated-max=2019-05-26T08:32:00%2B05:30&max-results=20&start=40&by-date=false') | |
print(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
from bs4 import BeautifulSoup | |
import sys | |
import base64 | |
import shutil | |
#reload(sys) | |
#sys.setdefaultencoding("utf-8") | |
def clone_post(strlink): | |
#strlink= "https://www.skptricks.com/2019/05/react-native-custom-animated-sliding-drawer.html" | |
r = requests.get(strlink) | |
soup = BeautifulSoup(r.content,"html.parser") | |
#product = [] | |
g_data = soup.find_all("div", {"class": "entry-content"}) | |
print(g_data[0]) | |
#g_next_info = soup.find_all("a", {"class": "blog-pager-older-link"}) | |
#print('.....................') | |
#g_next_link =g_next_info[0]['href'] | |
return g_data[0] | |
data = clone_post('https://www.skptricks.com/2019/05/react-native-custom-animated-sliding-drawer.html') | |
#print(data) | |
''' | |
file = open("file_content.txt", "w") | |
output = {"content": data} | |
json.dump(output, file) | |
file.close() | |
''' | |
''' | |
text_file = open("file_content.txt", "w") | |
text_file.write(data) | |
text_file.close() | |
''' | |
#data = base64.b64encode("'"+data+"'") | |
#with open("file.html", "w") as file: | |
#file.write(data) | |
with open ('test.html', 'a') as f: | |
f.write(str(data)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment