palcu/ada.js

## ada.js
            var form_data = new FormData($('#upload-file')[0]);
            $.ajax({
            	type: 'POST',
            	url: '/search_trends',
            	data: form_data,
            	success: multiwordsSuccess,
            	contentType: false,
            	processData: false,
            })

## flask.py
import tweepy
import re
import collections
import subprocess
import json

from flask import Flask, render_template, request
from langdetect import detect

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = './'


def get_twitter_api():
    keys = []
    with open('keys.txt') as stream:
        keys = [line.strip() for line in stream]
    [consumer_key, consumer_secret, access_token, access_token_secret] = keys

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    return tweepy.API(auth)


@app.route('/')
def home():
    return render_template('home.html')


@app.route('/trends')
def get_trends():
    api = get_twitter_api()
    US_WOEID = '23424977'

    response = api.trends_place(id=US_WOEID)
    trends_names = [x['name'] for x in response[0]['trends']]
    return json.dumps(trends_names)


@app.route('/search_trends', methods=['GET', 'POST'])
def search_trends():
    tweets = []
    if request.files.get('file'):
        tweets = [x.decode().strip()
                  for x in request.files['file'].stream.readlines()]
        tweets_file = 'tweets_upload.txt'
    else:
        tweets_file = 'tweets_trends.txt'
        api = get_twitter_api()
        trends_querys = json.loads(request.form['data'])

        tweets = []
        max_tweets = 20
        try:
            for i, query in enumerate(trends_querys):
                print("Getting trend {0}/{1}".format(i+1, len(trends_querys)))
                tweets += [status.text for status in
                           tweepy.Cursor(api.search, language='en',
                                         q=query, ).items(max_tweets)]
        except:
            tweets = None

    if tweets:
        with open(tweets_file, 'w') as stream:
            stream.write("\n".join(tweets))

    p = subprocess.Popen(["../ark-tweet-nlp-0.3.2/runTagger.sh",
                          "--no-confidence", "--input-format", "text",
                          "--output-format", "pretsv", "--quiet", tweets_file],
                         stdout=subprocess.PIPE)
    (output, err) = p.communicate()
    token_list_temp = re.sub("\n", "\t", output.decode()).split("\t")

    i = 0
    token_list = []
    while i < len(token_list_temp):
        token_list.append(token_list_temp[i:i+2])
        i += 3

    multiword_patterns = [
        ["^", "^"], ["N", "^"], ["^", "N"], ["N", "N"], ["A", "N"],
        ["A", "^"], ["V", "N"], ["V", "^"], ["V", "T"], ["R", "V"],
        ["V", "T", "T"], ["V", "T", "P"], ["V", "D", "N"], ["V", "D", "^"],
        ["N", "O", "N"], ["^", "O", "N"], ["N", "O", "^"], ["^", "O", "^"],
        ["D", "D", "N"], ["D", "D", "^"], ["V", "D", "N"], ["V", "D", "^"],
        ["V", "T", "P"], ["N", "N", "N"], ["N", "N", "^"], ["N", "^", "N"],
        ["^", "N", "N"], ["N", "^", "^"], ["^", "N", "^"], ["^", "^", "N"],
        ["^", "^", "^"], ["A", "N", "N"], ["A", "N", "^"], ["A", "^", "N"],
        ["A", "^", "^"], ["N", "A", "N"], ["^", "A", "^"], ["N", "A", "^"],
        ["^", "A", "N"], ["A", "A", "N"], ["A", "A", "^"], ["N", "P", "N"],
        ["^", "P", "N"], ["N", "P", "^"], ["^", "P", "^"],
        ["N", "P", "A", "N"], ["^", "P", "A", "N"], ["N", "P", "A", "^"],
        ["^", "P", "A", "^"], ["N", "P", "D", "N"], ["^", "P", "D", "N"],
        ["N", "P", "D", "^"], ["^", "P", "D", "^"], ["N", "P", "N", "N"],
        ["^", "P", "N", "N"], ["N", "P", "^", "N"], ["N", "P", "N", "^"],
        ["N", "P", "^", "^"], ["^", "P", "N", "^"], ["^", "P", "^", "N"],
        ["^", "P", "^", "^"], ["N", "N", "P", "N"], ["N", "N", "P", "^"],
        ["N", "^", "P", "N"], ["^", "N", "P", "N"], ["^", "^", "P", "N"],
        ["^", "N", "P", "^"], ["N", "^", "P", "^"], ["^", "^", "P", "^"]]

    dict_multiword = collections.defaultdict(int)
    dict_word = collections.defaultdict(int)

    last_tweet = ''
    for group in token_list:
        if len(group) == 2:
            [tweet, tag] = group

        if last_tweet != tweet:
            words = tweet.split()
            tags = tag.split()
            word1, word2, word3 = '', '', ''
            tag1, tag2, tag3 = '', '', ''
            pair = ""

            for i in range(len(words)):
                word = words[i].lower()
                tag = tags[i]

                if tag in ["N", "A", "V", "R", "P", "O"]:
                    dict_word[word] += 1
                    words_total += 1

                if [tag1, tag] in multiword_patterns:
                    multiword = word1 + " " + word
                    dict_multiword[multiword] += 1

                if [tag2, tag1, tag] in multiword_patterns:
                    multiword = word2 + " " + word1 + " " + word
                    dict_multiword[multiword] += 1

                if [tag3, tag2, tag1, tag] in multiword_patterns:
                    multiword = word3 + " " + word2 + " " + word1 + " " + word
                    dict_multiword[multiword] += 1

                word3 = word2
                tag3 = tag2
                word2 = word1
                tag2 = tag1
                word1 = word
                tag1 = tag
        last_tweet = tweet

    dict_multiword_score = collections.defaultdict(int)
    for key, val in dict_multiword.items():
        words = key.split()
        score = val

        for word in words:
            if word in dict_word:
                score -= dict_word[word] - val

            if score > 0:
                lang = ''

                try:
                    lang = detect(key)
                except:
                    pass

                if lang == 'en':
                    dict_multiword_score[key] = score

    ordered_dict_multiword_score = collections.OrderedDict(
        sorted(dict_multiword_score.items(), key=lambda t: t[1], reverse=True))
    return json.dumps(ordered_dict_multiword_score)

if __name__ == '__main__':
    app.debug = True
    app.run()
	var form_data = new FormData($('#upload-file')[0]);
	$.ajax({
	type: 'POST',
	url: '/search_trends',
	data: form_data,
	success: multiwordsSuccess,
	contentType: false,
	processData: false,
	})
	import tweepy
	import re
	import collections
	import subprocess
	import json

	from flask import Flask, render_template, request
	from langdetect import detect

	app = Flask(__name__)
	app.config['UPLOAD_FOLDER'] = './'


	def get_twitter_api():
	keys = []
	with open('keys.txt') as stream:
	keys = [line.strip() for line in stream]
	[consumer_key, consumer_secret, access_token, access_token_secret] = keys

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	return tweepy.API(auth)


	@app.route('/')
	def home():
	return render_template('home.html')


	@app.route('/trends')
	def get_trends():
	api = get_twitter_api()
	US_WOEID = '23424977'

	response = api.trends_place(id=US_WOEID)
	trends_names = [x['name'] for x in response[0]['trends']]
	return json.dumps(trends_names)


	@app.route('/search_trends', methods=['GET', 'POST'])
	def search_trends():
	tweets = []
	if request.files.get('file'):
	tweets = [x.decode().strip()
	for x in request.files['file'].stream.readlines()]
	tweets_file = 'tweets_upload.txt'
	else:
	tweets_file = 'tweets_trends.txt'
	api = get_twitter_api()
	trends_querys = json.loads(request.form['data'])

	tweets = []
	max_tweets = 20
	try:
	for i, query in enumerate(trends_querys):
	print("Getting trend {0}/{1}".format(i+1, len(trends_querys)))
	tweets += [status.text for status in
	tweepy.Cursor(api.search, language='en',
	q=query, ).items(max_tweets)]
	except:
	tweets = None

	if tweets:
	with open(tweets_file, 'w') as stream:
	stream.write("\n".join(tweets))

	p = subprocess.Popen(["../ark-tweet-nlp-0.3.2/runTagger.sh",
	"--no-confidence", "--input-format", "text",
	"--output-format", "pretsv", "--quiet", tweets_file],
	stdout=subprocess.PIPE)
	(output, err) = p.communicate()
	token_list_temp = re.sub("\n", "\t", output.decode()).split("\t")

	i = 0
	token_list = []
	while i < len(token_list_temp):
	token_list.append(token_list_temp[i:i+2])
	i += 3

	multiword_patterns = [
	["^", "^"], ["N", "^"], ["^", "N"], ["N", "N"], ["A", "N"],
	["A", "^"], ["V", "N"], ["V", "^"], ["V", "T"], ["R", "V"],
	["V", "T", "T"], ["V", "T", "P"], ["V", "D", "N"], ["V", "D", "^"],
	["N", "O", "N"], ["^", "O", "N"], ["N", "O", "^"], ["^", "O", "^"],
	["D", "D", "N"], ["D", "D", "^"], ["V", "D", "N"], ["V", "D", "^"],
	["V", "T", "P"], ["N", "N", "N"], ["N", "N", "^"], ["N", "^", "N"],
	["^", "N", "N"], ["N", "^", "^"], ["^", "N", "^"], ["^", "^", "N"],
	["^", "^", "^"], ["A", "N", "N"], ["A", "N", "^"], ["A", "^", "N"],
	["A", "^", "^"], ["N", "A", "N"], ["^", "A", "^"], ["N", "A", "^"],
	["^", "A", "N"], ["A", "A", "N"], ["A", "A", "^"], ["N", "P", "N"],
	["^", "P", "N"], ["N", "P", "^"], ["^", "P", "^"],
	["N", "P", "A", "N"], ["^", "P", "A", "N"], ["N", "P", "A", "^"],
	["^", "P", "A", "^"], ["N", "P", "D", "N"], ["^", "P", "D", "N"],
	["N", "P", "D", "^"], ["^", "P", "D", "^"], ["N", "P", "N", "N"],
	["^", "P", "N", "N"], ["N", "P", "^", "N"], ["N", "P", "N", "^"],
	["N", "P", "^", "^"], ["^", "P", "N", "^"], ["^", "P", "^", "N"],
	["^", "P", "^", "^"], ["N", "N", "P", "N"], ["N", "N", "P", "^"],
	["N", "^", "P", "N"], ["^", "N", "P", "N"], ["^", "^", "P", "N"],
	["^", "N", "P", "^"], ["N", "^", "P", "^"], ["^", "^", "P", "^"]]

	dict_multiword = collections.defaultdict(int)
	dict_word = collections.defaultdict(int)

	last_tweet = ''
	for group in token_list:
	if len(group) == 2:
	[tweet, tag] = group

	if last_tweet != tweet:
	words = tweet.split()
	tags = tag.split()
	word1, word2, word3 = '', '', ''
	tag1, tag2, tag3 = '', '', ''
	pair = ""

	for i in range(len(words)):
	word = words[i].lower()
	tag = tags[i]

	if tag in ["N", "A", "V", "R", "P", "O"]:
	dict_word[word] += 1
	words_total += 1

	if [tag1, tag] in multiword_patterns:
	multiword = word1 + " " + word
	dict_multiword[multiword] += 1

	if [tag2, tag1, tag] in multiword_patterns:
	multiword = word2 + " " + word1 + " " + word
	dict_multiword[multiword] += 1

	if [tag3, tag2, tag1, tag] in multiword_patterns:
	multiword = word3 + " " + word2 + " " + word1 + " " + word
	dict_multiword[multiword] += 1

	word3 = word2
	tag3 = tag2
	word2 = word1
	tag2 = tag1
	word1 = word
	tag1 = tag
	last_tweet = tweet

	dict_multiword_score = collections.defaultdict(int)
	for key, val in dict_multiword.items():
	words = key.split()
	score = val

	for word in words:
	if word in dict_word:
	score -= dict_word[word] - val

	if score > 0:
	lang = ''

	try:
	lang = detect(key)
	except:
	pass

	if lang == 'en':
	dict_multiword_score[key] = score

	ordered_dict_multiword_score = collections.OrderedDict(
	sorted(dict_multiword_score.items(), key=lambda t: t[1], reverse=True))
	return json.dumps(ordered_dict_multiword_score)

	if __name__ == '__main__':
	app.debug = True
	app.run()