This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf - 8 | |
import numpy as np | |
import pandas as pd | |
from pandas import Series,DataFrame | |
import matplotlib.pyplot as plt | |
import matplotlib.font_manager as fm | |
import csv | |
myfont = fm.FontProperties(fname='C:/Windows/Fonts/msyh.ttc') | |
data1= pd.read_csv("C:/Users/Bohemian/Desktop/salerecord(2).csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf - 8 | |
import pandas as pd | |
from scipy import stats | |
from scipy.stats import ttest_ind | |
data_train = pd.read_csv('C:/Users/Bohemian/Desktop/dayuecheng.csv') | |
print (data_train.head(10)) | |
x = data_train['keliuliang'] | |
y = data_train['jindiankeliu'] | |
z = data_train['jdxfcs'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 本段代码执行以下功能:1.从Mysql数据库中导入数据,此时print的数据是series 数据结构,然而我们在Python中分析的都是dataframe数据结构,因此需要把series转换为dataframe。 | |
import pymysql | |
import numpy as np | |
import pandas as pd | |
from pandas import Series,DataFrame | |
import matplotlib.pyplot as plt | |
import matplotlib.font_manager as fm | |
#import charts | |
db = pymysql.connect("localhost",user="root",passwd="root",db="zhkj",charset="utf8") | |
cursor = db.cursor(cursor=pymysql.cursors.DictCursor) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from pandas import Series,DataFrame | |
import matplotlib.pyplot as plt | |
import matplotlib.font_manager as fm | |
myfont = fm.FontProperties(fname='C:/Windows/Fonts/msyh.ttc') | |
data_train = pd.read_csv("C:/Users/Bohemian/Desktop/titanic/train.csv") | |
#print (data_train) | |
#print (data_train.describe()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests,urllib.request,pymongo | |
client = pymongo.MongoClient('localhost', 27017) | |
TC = client['58TC'] | |
sheet_58 = TC['sheet_58'] | |
def get_more_page(start, end): | |
urls= ['http://bj.58.com/shoujihao/pn{}/'.format(str(i)) for i in range(start,end)] | |
for url in urls: | |
wb_data = requests.get(url) | |
soup = BeautifulSoup(wb_data.text,'lxml') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests,time | |
page_links = [] | |
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36','Cookie':'f=n; ipcity=qd%7C%u9752%u5C9B; f=n; id58=c5/nn1c9J+syWfPhEuVGAg==; mcity=zz; als=0; hots=%5B%7B%22d%22%3A0%2C%22s1%22%3A%22ipad%20%E7%88%B1%E4%BA%BA%E3%80%81%22%2C%22s2%22%3A%22%22%2C%22n%22%3A%22sou%22%7D%5D; __utma=253535702.1008435073.1478163262.1478163262.1478163262.1; __utmz=253535702.1478163262.1.1.utmcsr=zz.58.com|utmccn=(referral)|utmcmd=referral|utmcct=/; myfeet_tooltip=end; bj58_id58s="UExrSDM4aVFmc089NTk4Mw=="; city=zz; 58home=zz; sessionid=4eb22971-5459-4520-b22c-46b75796fe78; bj58_new_session=0; bj58_init_refer=""; bj58_new_uv=4; 58tj_uuid=09dbb43f-9f6c-4711-987a-b2582099fb07; new_session=0; new_uv=5; utm_source=; spm=; init_refer='} | |
# 获得从第几页到第几页的所有详情页信息 | |
def get_more_page(start, end): | |
urls = ['http://bj.58.com/pbdn/0/pn{}'.format(str(i)) for i in | |
range(start, end)] | |
f |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests,urllib.request,time | |
## 问题:此程序虽然进行反反爬虫措施,但效果仍然不好。并且“+item[-num:]”中num的设置很有问题,设置过大,出现字母时爬虫会自动结束,需要修改:try: except? | |
url = 'http://jandan.net/ooxx/page-' | |
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', | |
'Cookie':'gif-click-load=off; bad-click-load=off; _gat=1; jdna=596e6fb28c1bb47f949e65e1ae03f7f5#1478358545174; Hm_lvt_fd93b7fb546adcfbcf80c4fc2b54da2c=1478338211,1478358498; Hm_lpvt_fd93b7fb546adcfbcf80c4fc2b54da2c=1478358546; _ga=GA1.2.666641045.1478338213'} | |
def get_attractions(url): | |
wb_data = requests.get(url,headers = header) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 从赶集首页获取商品类目链接: | |
from bs4 import BeautifulSoup | |
import requests,pymongo | |
start_url = 'http://bj.ganji.com/wu/' | |
url_host = 'http://bj.ganji.com' | |
def get_channel(url): | |
wb_data = requests.get(url) | |
soup = BeautifulSoup(wb_data.text, 'lxml') | |
links = soup.select('.fenlei > dt > a') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
import charts | |
client = pymongo.MongoClient('localhost', 27017) | |
IT_juzi = client['IT_juzi'] | |
info_list = IT_juzi ['info_list'] | |
#### 河南地区公司获投状态分布图(柱状图) 制作 #### | |
for i in info_list.find(): | |
print ( i['发展阶段']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#爬取相关30页网址,以及每一页中的链接,每一链接里面的内容、并把相关信息存放进数据库中: | |
#2016.11.17 由于没有加入代理ip列表,也没有设置headers,cookies,在抓取过程中,被拉勾网封Ip....教训!!!!! | |
from bs4 import BeautifulSoup | |
import requests,pymongo,re | |
import random | |
import time | |
host_url = 'https:' | |
start_url = 'https://www.lagou.com/zhaopin/shujuwajue/?filterOption=3' | |
client = pymongo.MongoClient('localhost', 27017) |