Skip to content

Instantly share code, notes, and snippets.

@tranchausky
Created August 7, 2019 08:42
Show Gist options
  • Save tranchausky/2c3406bed7cd846aef63c54cd042ba3d to your computer and use it in GitHub Desktop.
Save tranchausky/2c3406bed7cd846aef63c54cd042ba3d to your computer and use it in GitHub Desktop.
clone carw data python
import requests
import json
from bs4 import BeautifulSoup
import sys
'''
url= "https://www.skptricks.com/search/label/React%20Native"
r = requests.get(url)
soup = BeautifulSoup(r.content)
product = []
g_data = soup.find_all("h2", {"class": "entry-title"})
for item in g_data:
try:
#product.set_<field_name>(item.find_all("h2", {"class": "post-title"})[0].text)
print('-------------------')
#print(item)
#sys.exit()
#product.set_<2222>(123123)
#product.set_<field_name>("h2", {"class": "post-title"})[0].text
#product.set_<field_name>("span", {"class": "info"})[0].text
#product.set_<field_name>("span", {"class": "info"})[1].text
#product.set_<field_name>("span", {"class": "info"})[2].text
#product.set_<field_name>("span", {"class": "price right right10"})[0].text
#print(item.find_all("a", {})[0].text)
print(item.find_all("a", {})[0].text)
print(item.find_all("a", {})[0]['href'])
nhap =[]
nhap.append(item.find_all("a", {})[0].text)
nhap.append(item.find_all("a", {})[0]['href'])
#infora = item.find_all("a", href=True)
#print(infora('href'))
product.append(nhap)
except:
pass
import json
file = open("filename", "w")
output = {"product1": product}
json.dump(output, file)
file.close()
'''
def clonebyurl (strlink):
#strlink= "https://www.skptricks.com/search/label/React%20Native"
r = requests.get(strlink)
soup = BeautifulSoup(r.content)
product = []
g_data = soup.find_all("h2", {"class": "entry-title"})
g_next_info = soup.find_all("a", {"class": "blog-pager-older-link"})
#print('.....................')
g_next_link =g_next_info[0]['href']
for item in g_data:
try:
#print('-------------------')
#print(item.find_all("a", {})[0].text)
#print(item.find_all("a", {})[0]['href'])
nhap =[]
nhap.append(item.find_all("a", {})[0].text)
nhap.append(item.find_all("a", {})[0]['href'])
product.append(nhap)
except:
pass
result = {};
result['data'] = product
result['next'] = g_next_link
return result;
data = clonebyurl('https://www.skptricks.com/search/label/React%20Native?updated-max=2019-05-26T08:32:00%2B05:30&max-results=20&start=40&by-date=false')
print(data)
import requests
import json
from bs4 import BeautifulSoup
import sys
import base64
import shutil
#reload(sys)
#sys.setdefaultencoding("utf-8")
def clone_post(strlink):
#strlink= "https://www.skptricks.com/2019/05/react-native-custom-animated-sliding-drawer.html"
r = requests.get(strlink)
soup = BeautifulSoup(r.content,"html.parser")
#product = []
g_data = soup.find_all("div", {"class": "entry-content"})
print(g_data[0])
#g_next_info = soup.find_all("a", {"class": "blog-pager-older-link"})
#print('.....................')
#g_next_link =g_next_info[0]['href']
return g_data[0]
data = clone_post('https://www.skptricks.com/2019/05/react-native-custom-animated-sliding-drawer.html')
#print(data)
'''
file = open("file_content.txt", "w")
output = {"content": data}
json.dump(output, file)
file.close()
'''
'''
text_file = open("file_content.txt", "w")
text_file.write(data)
text_file.close()
'''
#data = base64.b64encode("'"+data+"'")
#with open("file.html", "w") as file:
#file.write(data)
with open ('test.html', 'a') as f:
f.write(str(data))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment