Cheng-Jun Wang chengjun

## 顶点小说下载Python版本.py
import urllib2
from bs4 import BeautifulSoup
import sys

# get the link for each chapter
url = "http://www.23wx.com/html/50/50550/" # 三界独尊
content = urllib2.urlopen(url).read()
soup = BeautifulSoup(content)
links = soup.find_all('td')[1000:]

## scholartree
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 07 15:40:57 2015

@author: chengwang6
"""
import urllib2
from bs4 import BeautifulSoup
## Set the seed of crawler
seed = 'https://scholar.google.nl/citations?user=nNdt_G8AAAAJ&hl=en&oe=ASCII'

## clean_wisenews.py

with open("F:/百度云同步盘/Computational Communication/Data/占中数据20150328/zz-hk-2013.1-2013.3.rtf") as f:
   news = f.readlines()


def stringclean(s):
    s = s.replace(r'\loch\af0\hich\af0\dbch\f15 \b\cf6 ', '')
    s = s.replace(r'\loch\af0\hich\af0\dbch\f15 \b0\cf0 ', '')
    s = s.replace('\par', '').replace('\n', '')
    return s

## base_r_plotting_tutorial_koontz_d-rug.r
### Title: Back to basics: High quality plots using base R graphics
###	An interactive tutorial for the Davis R Users Group meeting on April 24, 2015
###
### Date created: 20150418
### Last updated: 20150423
###
### Author: Michael Koontz
### Email: mikoontz@gmail.com
###	Twitter: @michaeljkoontz
###

## network_diffusion.R
require(igraph)
# generate a social graph
node_number = 100
g = barabasi.game(node_number) ; plot(g)

seeds_num = 1
set.seed(2014); diffusers = sample(V(g),seeds_num) ; diffusers
infected =list()
infected[[1]]= diffusers

## stackexchange_days_size_adjusted_fit.py
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 13 21:28:10 2014

@author: v_chjwang
"""

from os import listdir
import glob
from collections import defaultdict

## weibo_network_diffusion.py
#!/usr/bin/env python
# -*- coding: utf8 -*-

from weibo import APIClient
import urllib2
import urllib
import sys
import time
from time import clock
import random

## renren_university_flow.py
import sys
from collections import defaultdict, Counter
import glob


reload(sys)
sys.setdefaultencoding('utf8')


path = "D:/chengjun/renren/"

## renren_friends_unique.py
‘’‘
step3: delte duplicates, sort data and save data
’‘’

import os
import glob
from collections import defaultdict

path = "D:/renren/friends_sorted/"

## split_renrendata_by_chunks.py
'''
# Step2: split the duplicated data into about 2000+ files by user ids
# to prepare for deleting the duplicated ties
'''

from collections import defaultdict

path = "D:/renren/"

bigfile = open(path + "friends_all.txt")
	import urllib2
	from bs4 import BeautifulSoup
	import sys

	# get the link for each chapter
	url = "http://www.23wx.com/html/50/50550/" # 三界独尊
	content = urllib2.urlopen(url).read()
	soup = BeautifulSoup(content)
	links = soup.find_all('td')[1000:]
	# -- coding: utf-8 --
	"""
	Created on Tue Jul 07 15:40:57 2015

	@author: chengwang6
	"""
	import urllib2
	from bs4 import BeautifulSoup
	## Set the seed of crawler
	seed = 'https://scholar.google.nl/citations?user=nNdt_G8AAAAJ&hl=en&oe=ASCII'

	with open("F:/百度云同步盘/Computational Communication/Data/占中数据20150328/zz-hk-2013.1-2013.3.rtf") as f:
	news = f.readlines()


	def stringclean(s):
	s = s.replace(r'\loch\af0\hich\af0\dbch\f15 \b\cf6 ', '')
	s = s.replace(r'\loch\af0\hich\af0\dbch\f15 \b0\cf0 ', '')
	s = s.replace('\par', '').replace('\n', '')
	return s
	### Title: Back to basics: High quality plots using base R graphics
	### An interactive tutorial for the Davis R Users Group meeting on April 24, 2015
	###
	### Date created: 20150418
	### Last updated: 20150423
	###
	### Author: Michael Koontz
	### Email: mikoontz@gmail.com
	### Twitter: @michaeljkoontz
	###
	require(igraph)
	# generate a social graph
	node_number = 100
	g = barabasi.game(node_number) ; plot(g)

	seeds_num = 1
	set.seed(2014); diffusers = sample(V(g),seeds_num) ; diffusers
	infected =list()
	infected[[1]]= diffusers
	# -- coding: utf-8 --
	"""
	Created on Wed Aug 13 21:28:10 2014

	@author: v_chjwang
	"""

	from os import listdir
	import glob
	from collections import defaultdict
	#!/usr/bin/env python
	# -- coding: utf8 --

	from weibo import APIClient
	import urllib2
	import urllib
	import sys
	import time
	from time import clock
	import random
	import sys
	from collections import defaultdict, Counter
	import glob


	reload(sys)
	sys.setdefaultencoding('utf8')


	path = "D:/chengjun/renren/"
	‘’‘
	step3: delte duplicates, sort data and save data
	’‘’

	import os
	import glob
	from collections import defaultdict

	path = "D:/renren/friends_sorted/"
	'''
	# Step2: split the duplicated data into about 2000+ files by user ids
	# to prepare for deleting the duplicated ties
	'''

	from collections import defaultdict

	path = "D:/renren/"

	bigfile = open(path + "friends_all.txt")