Skip to content

Instantly share code, notes, and snippets.

@GlinZachariah
Created October 28, 2019 17:01
Show Gist options
  • Save GlinZachariah/808338016d445dd3ae996890ce09ebdb to your computer and use it in GitHub Desktop.
Save GlinZachariah/808338016d445dd3ae996890ce09ebdb to your computer and use it in GitHub Desktop.
from google.cloud import firestore
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import json
import datetime
import os,csv,re,emoji
#filekey =input("Enter key file name: ")
companyname =input("Enter company name: ")
# firestore authentication
cred = credentials.Certificate('firebase-adminsdk-.json')
default_app = firebase_admin.initialize_app(cred)
db = firestore.client()
docs = db.collection(u'dataset-netflix').get()
for doc in docs:
tweet_id =doc.id
tweet=doc.to_dict()
print(tweet_id)
document_name=""
if filekey=="key2":
document_name ="key2/dataset_"+companyname+".csv"
else:
document_name ="final_dataset_"+companyname+".csv"
with open("output/"+document_name, mode='a') as tweets_file:
tweet_writer = csv.writer(tweets_file, delimiter='^', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#twitter attributes #tweet Text processing
tweet_text =tweet['Tweet text']
tweet_text =emoji.demojize(tweet_text)
tweet_text= re.sub(',', ' ', tweet_text)
sentence =tweet_text.lower() #convert to lower case
fsen = re.sub(" #| & |\n|\t"," ",sentence) #removing hastags line breaks and tabs with space
fsen = re.sub("#| \n| \t|\.","",fsen) #removing hastags line breaks and tabs without space
fsen = re.sub(r'\d+', '', fsen) #removing numbers
#removing links from text
sen =fsen.split(" ")
lsen=[]
for i in sen:
if "https://" not in str(i):
lsen.append(i)
tweet_text= " ".join(lsen)
tweet_retweet_count = tweet['Retweet count']
tweet_favorite_count = tweet['Favorited']
d =datetime.datetime.strptime(tweet['Tweet Timestamp'], '%a %b %d %H:%M:%S %z %Y')
tweet_date=d.strftime("%Y-%m-%d")
tweet_time=d.strftime("%H:%M:%S")
tweet_senstivity=tweet['Tweet Sensitivity']
tweet_verified_user = tweet['Verified User']
tweet_followers_count =tweet['User followers_count']
tweet_favorited =tweet['Favorited']
tweet_retweeted =tweet['Retweeted']
game =str(tweet_id)+ '^'+str(tweet_retweet_count)+'^'+str(tweet_favorite_count)+ '^'+'\''+str(tweet_text)+'\'' '^' + str(tweet_date) + "^"+ str(tweet_time)+ "^"+ str(tweet_senstivity)+ "^"+ str(tweet_verified_user)+ '^'+ str(tweet_followers_count)+ '^'+ str(tweet_favorited)+ '^'+ str(tweet_retweeted)
tweet_writer.writerow([game])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment