shuHelicopter/tweepy Secret

## tweepy
# extract data from Twitter with tweepy
import csv
fil = open('companies_mdf.csv') # 'companies_mdf.csv' is the dataset after the first preprocessing with R
csv_f = csv.reader(fil)
name_list = []

# get the list of companies' names from previous data file
for row in csv_f:
    name_list.append(row[11])  # '11' is the index of the column which stores information of companies' name

twt = []
k = 0
for name in name_list[1:]: # start from 1 due to the header
    try:
        company = {}
        user = api.get_user(screen_name = name)
        company['name'] = name
        print company['name'] # used for check the process of crawling

        company['friends_num'] = user.friends_count
        print company['friends_num'] # used for check the process of crawling

        company['followers_num'] = user.followers_count
        print company['followers_num'] # used for check the process of crawling

        company['statuses_num'] = user.statuses_count
        print company['statuses_num'] # used for check the process of crawling

        company['favourites_num'] = user.favourites_count
        print  company['favourites_num'] # used for check the process of crawling


    except tweepy.RateLimitError:
        print '********* Rate Limit Error *********'
        time.sleep(60*16) # handle rate limit problem due to Tweety's policy
        company['name'] = name
        company['friends_num'] = user.friends_count
        company['followers_num'] = user.followers_count
        company['statuses_num'] = user.statuses_count
        company['favourites_num'] = user.favourites_count

    except tweepy.TweepError:
        company['name'] = name
        company['friends_num'] = None
        company['followers_num'] = None
        company['statuses_num'] = None
        company['favourites_num'] = None

    twt.append(company)
    k = k+1
    print '############## %d ############' % k # used for check the process of crawling

# store data extracted from Twitter into another data file
f = codecs.open('twitter.csv', 'w', 'utf-8')
twt_data = pd.DataFrame(twt)
twt_data.to_csv('twitter.csv')
f.close()
	# extract data from Twitter with tweepy
	import csv
	fil = open('companies_mdf.csv') # 'companies_mdf.csv' is the dataset after the first preprocessing with R
	csv_f = csv.reader(fil)
	name_list = []

	# get the list of companies' names from previous data file
	for row in csv_f:
	name_list.append(row[11]) # '11' is the index of the column which stores information of companies' name

	twt = []
	k = 0
	for name in name_list[1:]: # start from 1 due to the header
	try:
	company = {}
	user = api.get_user(screen_name = name)
	company['name'] = name
	print company['name'] # used for check the process of crawling

	company['friends_num'] = user.friends_count
	print company['friends_num'] # used for check the process of crawling

	company['followers_num'] = user.followers_count
	print company['followers_num'] # used for check the process of crawling

	company['statuses_num'] = user.statuses_count
	print company['statuses_num'] # used for check the process of crawling

	company['favourites_num'] = user.favourites_count
	print company['favourites_num'] # used for check the process of crawling


	except tweepy.RateLimitError:
	print '******* Rate Limit Error *******'
	time.sleep(60*16) # handle rate limit problem due to Tweety's policy
	company['name'] = name
	company['friends_num'] = user.friends_count
	company['followers_num'] = user.followers_count
	company['statuses_num'] = user.statuses_count
	company['favourites_num'] = user.favourites_count

	except tweepy.TweepError:
	company['name'] = name
	company['friends_num'] = None
	company['followers_num'] = None
	company['statuses_num'] = None
	company['favourites_num'] = None

	twt.append(company)
	k = k+1
	print '############## %d ############' % k # used for check the process of crawling

	# store data extracted from Twitter into another data file
	f = codecs.open('twitter.csv', 'w', 'utf-8')
	twt_data = pd.DataFrame(twt)
	twt_data.to_csv('twitter.csv')
	f.close()