Skip to content

Instantly share code, notes, and snippets.

@ajoydas
Created July 13, 2017 09:13
Show Gist options
  • Save ajoydas/f398a43b8fd70ca49f4ff841fbc0aa86 to your computer and use it in GitHub Desktop.
Save ajoydas/f398a43b8fd70ca49f4ff841fbc0aa86 to your computer and use it in GitHub Desktop.
import pandas as pd
from pandas.io.json import json_normalize
from pymongo import MongoClient
import matplotlib.pyplot as plt
import re
import time
pd.set_option('display.expand_frame_repr', False)
def _connect_mongo(host, port, db):
conn = MongoClient(host, port)
return conn[db]
def read_mongo(db, collection, host, port):
""" Read from Mongo and Store into DataFrame """
# Connect to MongoDB
db = _connect_mongo(host=host, port=port, db=db)
t0 = time.time()
cursor = db[collection].find({'created_at':{'$regex': '2013'}},
no_cursor_timeout=True)
t1 = time.time()
total = t1-t0
print total
cursor = list(cursor)
t2 = time.time()
total = t2-t0
print total
df = json_normalize(cursor)
return df
# db = 'twittersmall'
# collection='twitterdata'
db = 'twitter'
collection='twitterCol'
#query={'lang':'{$exists: true}'}
host='localhost'
port=27017
var = read_mongo(db, collection, host, port)
print var.head()
# yoo = pd.DataFrame()
# yoo['status'] = var['user.statuses_count'].head()
# print yoo
# yoo.sort_values(by='status', inplace=True, ascending=False)
# print yoo
var.sort_values(by='user.statuses_count', inplace=True, ascending=False)
print var.head()
# user=[]
# user.append(var.loc['_id'],['user.screen_name']])
# print user
print var.columns
var = var.iloc[0:5]
df2 = var[['user.screen_name', 'user.statuses_count']]
print df2.columns
print df2.head()
#df2=df1.iloc[0:5]
print df2
df2=df2.values
print df2
names = []
vals=[]
for x in range (0,5):
names.append(df2[x][0])
vals.append(df2[x][1])
print names
print vals
x_pos = list(range(len(names)))
width = 0.8
fig, ax = plt.subplots()
plt.bar(x_pos, vals, width, alpha=1, color='g')
# Setting axis labels and ticks
ax.set_ylabel('Number of tweets', fontsize=15)
ax.set_title('Names', fontsize=10, fontweight='bold')
ax.set_xticks([p + 0.1 * width for p in x_pos])
ax.set_xticklabels(names)
plt.setp(ax.get_xticklabels(), fontsize=10, rotation='vertical')
plt.grid()
plt.savefig('tweets_by_prg_language_1', format='png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment