Aniruddha Bhandari aniruddha27

## collection_doc_count.py
# Number of documents in restaurants collection
db.restaurants.find().count()  # 25359

# Number of documents in neighborhoods collection
db.neighborhoods.find().count()  # 195

## list_collections.py
# List the collecitons in the database
db.list_collection_names()

## load_database.py
# Load restaurants database
db = client.sample_restaurants

## list_indexes.py
# List all databases
client.list_database_names()

## import_pymongo.py
# importing the required libraries
import pymongo
import pprint
import json
import warnings
warnings.filterwarnings('ignore')
from pprint import pprint

# connect to the mongoclient
client = pymongo.MongoClient(<connection_string>)

## visualize_hashtags.py
# Plotting hashtags counts

data = df_pivot.nlargest(columns="Tweet_Id", n = 15)

# Creating bar graph
plt.figure(figsize=(16,5))
ax = sns.barplot(data=data, x= "Hashtags", y = "Tweet_Id", palette=("Reds_d"))

# Altering the visual elements
sns.set_context("poster")

## hastags_pivot_table.py
# Unique hashtag counts
table = df_tags.pivot_table(index="Hashtags",values='Tweet_Id',aggfunc=len)

# Convert pivot table to dataframe
df_pivot = pd.DataFrame(table.to_records())

df_pivot.head()

## sql_query_hashtags.py
# Querying hashtags from database
data_tags = DbConnect("SELECT Tweet_Id, Hashtag FROM TwitterEntity;")

df_tags = pd.DataFrame(columns=['Tweet_Id','Hashtags'])

for data in data_tags:
    index = len(df_tags)
    df_tags.loc[index,'Tweet_Id'] = data[0]
    df_tags.loc[index,'Hashtags'] = data[1]


## tweet_sentiment.py
# Sentiment analysis using Textblob
def sentiment(tweet):

    analysis = TextBlob(tweet)
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1

## wordcloud.py
# Most commomly occuring words
def keywords():
    all_words = ' '.join([text for text in df_tweet['Clean_Tweet']])
    wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(all_words)

    plt.figure(figsize=(10, 7))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis('off')
    plt.show()
	# Number of documents in restaurants collection
	db.restaurants.find().count() # 25359

	# Number of documents in neighborhoods collection
	db.neighborhoods.find().count() # 195
	# List the collecitons in the database
	db.list_collection_names()
	# importing the required libraries
	import pymongo
	import pprint
	import json
	import warnings
	warnings.filterwarnings('ignore')
	from pprint import pprint

	# connect to the mongoclient
	client = pymongo.MongoClient(<connection_string>)
	# Plotting hashtags counts

	data = df_pivot.nlargest(columns="Tweet_Id", n = 15)

	# Creating bar graph
	plt.figure(figsize=(16,5))
	ax = sns.barplot(data=data, x= "Hashtags", y = "Tweet_Id", palette=("Reds_d"))

	# Altering the visual elements
	sns.set_context("poster")
	# Unique hashtag counts
	table = df_tags.pivot_table(index="Hashtags",values='Tweet_Id',aggfunc=len)

	# Convert pivot table to dataframe
	df_pivot = pd.DataFrame(table.to_records())

	df_pivot.head()
	# Querying hashtags from database
	data_tags = DbConnect("SELECT Tweet_Id, Hashtag FROM TwitterEntity;")

	df_tags = pd.DataFrame(columns=['Tweet_Id','Hashtags'])

	for data in data_tags:
	index = len(df_tags)
	df_tags.loc[index,'Tweet_Id'] = data[0]
	df_tags.loc[index,'Hashtags'] = data[1]
	# Sentiment analysis using Textblob
	def sentiment(tweet):

	analysis = TextBlob(tweet)
	if analysis.sentiment.polarity > 0:
	return 1
	elif analysis.sentiment.polarity == 0:
	return 0
	else:
	return -1
	# Most commomly occuring words
	def keywords():
	all_words = ' '.join([text for text in df_tweet['Clean_Tweet']])
	wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(all_words)

	plt.figure(figsize=(10, 7))
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis('off')
	plt.show()