Skip to content

Instantly share code, notes, and snippets.

@palcu
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save palcu/890e01c70c4680294cdd to your computer and use it in GitHub Desktop.
Save palcu/890e01c70c4680294cdd to your computer and use it in GitHub Desktop.
ada
var form_data = new FormData($('#upload-file')[0]);
$.ajax({
type: 'POST',
url: '/search_trends',
data: form_data,
success: multiwordsSuccess,
contentType: false,
processData: false,
})
import tweepy
import re
import collections
import subprocess
import json
from flask import Flask, render_template, request
from langdetect import detect
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = './'
def get_twitter_api():
keys = []
with open('keys.txt') as stream:
keys = [line.strip() for line in stream]
[consumer_key, consumer_secret, access_token, access_token_secret] = keys
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
return tweepy.API(auth)
@app.route('/')
def home():
return render_template('home.html')
@app.route('/trends')
def get_trends():
api = get_twitter_api()
US_WOEID = '23424977'
response = api.trends_place(id=US_WOEID)
trends_names = [x['name'] for x in response[0]['trends']]
return json.dumps(trends_names)
@app.route('/search_trends', methods=['GET', 'POST'])
def search_trends():
tweets = []
if request.files.get('file'):
tweets = [x.decode().strip()
for x in request.files['file'].stream.readlines()]
tweets_file = 'tweets_upload.txt'
else:
tweets_file = 'tweets_trends.txt'
api = get_twitter_api()
trends_querys = json.loads(request.form['data'])
tweets = []
max_tweets = 20
try:
for i, query in enumerate(trends_querys):
print("Getting trend {0}/{1}".format(i+1, len(trends_querys)))
tweets += [status.text for status in
tweepy.Cursor(api.search, language='en',
q=query, ).items(max_tweets)]
except:
tweets = None
if tweets:
with open(tweets_file, 'w') as stream:
stream.write("\n".join(tweets))
p = subprocess.Popen(["../ark-tweet-nlp-0.3.2/runTagger.sh",
"--no-confidence", "--input-format", "text",
"--output-format", "pretsv", "--quiet", tweets_file],
stdout=subprocess.PIPE)
(output, err) = p.communicate()
token_list_temp = re.sub("\n", "\t", output.decode()).split("\t")
i = 0
token_list = []
while i < len(token_list_temp):
token_list.append(token_list_temp[i:i+2])
i += 3
multiword_patterns = [
["^", "^"], ["N", "^"], ["^", "N"], ["N", "N"], ["A", "N"],
["A", "^"], ["V", "N"], ["V", "^"], ["V", "T"], ["R", "V"],
["V", "T", "T"], ["V", "T", "P"], ["V", "D", "N"], ["V", "D", "^"],
["N", "O", "N"], ["^", "O", "N"], ["N", "O", "^"], ["^", "O", "^"],
["D", "D", "N"], ["D", "D", "^"], ["V", "D", "N"], ["V", "D", "^"],
["V", "T", "P"], ["N", "N", "N"], ["N", "N", "^"], ["N", "^", "N"],
["^", "N", "N"], ["N", "^", "^"], ["^", "N", "^"], ["^", "^", "N"],
["^", "^", "^"], ["A", "N", "N"], ["A", "N", "^"], ["A", "^", "N"],
["A", "^", "^"], ["N", "A", "N"], ["^", "A", "^"], ["N", "A", "^"],
["^", "A", "N"], ["A", "A", "N"], ["A", "A", "^"], ["N", "P", "N"],
["^", "P", "N"], ["N", "P", "^"], ["^", "P", "^"],
["N", "P", "A", "N"], ["^", "P", "A", "N"], ["N", "P", "A", "^"],
["^", "P", "A", "^"], ["N", "P", "D", "N"], ["^", "P", "D", "N"],
["N", "P", "D", "^"], ["^", "P", "D", "^"], ["N", "P", "N", "N"],
["^", "P", "N", "N"], ["N", "P", "^", "N"], ["N", "P", "N", "^"],
["N", "P", "^", "^"], ["^", "P", "N", "^"], ["^", "P", "^", "N"],
["^", "P", "^", "^"], ["N", "N", "P", "N"], ["N", "N", "P", "^"],
["N", "^", "P", "N"], ["^", "N", "P", "N"], ["^", "^", "P", "N"],
["^", "N", "P", "^"], ["N", "^", "P", "^"], ["^", "^", "P", "^"]]
dict_multiword = collections.defaultdict(int)
dict_word = collections.defaultdict(int)
last_tweet = ''
for group in token_list:
if len(group) == 2:
[tweet, tag] = group
if last_tweet != tweet:
words = tweet.split()
tags = tag.split()
word1, word2, word3 = '', '', ''
tag1, tag2, tag3 = '', '', ''
pair = ""
for i in range(len(words)):
word = words[i].lower()
tag = tags[i]
if tag in ["N", "A", "V", "R", "P", "O"]:
dict_word[word] += 1
words_total += 1
if [tag1, tag] in multiword_patterns:
multiword = word1 + " " + word
dict_multiword[multiword] += 1
if [tag2, tag1, tag] in multiword_patterns:
multiword = word2 + " " + word1 + " " + word
dict_multiword[multiword] += 1
if [tag3, tag2, tag1, tag] in multiword_patterns:
multiword = word3 + " " + word2 + " " + word1 + " " + word
dict_multiword[multiword] += 1
word3 = word2
tag3 = tag2
word2 = word1
tag2 = tag1
word1 = word
tag1 = tag
last_tweet = tweet
dict_multiword_score = collections.defaultdict(int)
for key, val in dict_multiword.items():
words = key.split()
score = val
for word in words:
if word in dict_word:
score -= dict_word[word] - val
if score > 0:
lang = ''
try:
lang = detect(key)
except:
pass
if lang == 'en':
dict_multiword_score[key] = score
ordered_dict_multiword_score = collections.OrderedDict(
sorted(dict_multiword_score.items(), key=lambda t: t[1], reverse=True))
return json.dumps(ordered_dict_multiword_score)
if __name__ == '__main__':
app.debug = True
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment