Skip to content

Instantly share code, notes, and snippets.

@gdsaxton
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gdsaxton/2610b0b796cb72e87d15 to your computer and use it in GitHub Desktop.
Save gdsaxton/2610b0b796cb72e87d15 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
tags_from_tweets.py - Take hashtags from tweets in SQLite database. Output to text file.
"""
import sys
import re
import sqlite3
def main():
"""Main function."""
database = "arnova14.sqlite"
conn = sqlite3.connect(database)
c = conn.cursor()
c.execute('SELECT * FROM search_tweets')
tweets = c.fetchall()
# CREATE EMPTY DICTIONARY FOR TAGS
all_text = []
for row in tweets:
id = row[0]
hashtags = row[31] #the tags
if hashtags:
tags = hashtags.lower()
print tags
else:
tags = ''
tags = re.sub('\n', ' ', tags)
# to remove 'u' before each tweet in the list --> DOESN'T WORK WITH SQLITE INSERTION
tags = tags.encode("utf-8")
all_text.append(tags)
print "\radding content for id: %d" % id,
sys.stdout.flush()
all_hashtags = ' '.join(all_text)
out=file('all_text_HASHTAGS.txt','w')
out.write(all_hashtags)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment