Luís Gonçalves luisfredgs

## AttentionWithContext.py
class AttentionWithContext(Layer):
    """
        Attention operation, with a context/query vector, for temporal data.
        Supports Masking.
        Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
        "Hierarchical Attention Networks for Document Classification"
        by using a context vector to assist the attention
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape

## kcca.py
#! encoding=UTF-8
"""
kernel canonical correlation analysis
"""

import numpy as np
from scipy.linalg import svd
from sklearn.metrics.pairwise import pairwise_kernels, euclidean_distances

class KCCA(object):

## pandas_csv_to_json.py
import pandas as pd
data = pd.read_csv("file.csv", sep=",")
print(data.head(2))
with open('file.json', 'w') as f:
    f.write(data.to_json(orient='records', lines=True))

# check
data = pd.read_json("file.json", lines=True)
print(data.head(2))

## latex2png_win.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                luisfredgs
                / latex2png_win.md
            
            
              Created
              July 30, 2019 13:28
                — forked from retorillo/latex2png_win.md
            
              
                LaTeX to PNG on Windows
              
          
    LaTeX to PNG on Windows

Prerequisite


Windows System (Tested on Windows 10)
pdflatex TexLive (/w standalone package)

Installing Tex Live over the Internet


magick Image Magick

choco install imagemagick


gswin32c Ghost Script (x86)


## emocontext-preprocessing.py
from ekphrasis.classes.preprocessor import TextPreProcessor
from ekphrasis.classes.tokenizer import SocialTokenizer
from ekphrasis.dicts.emoticons import emoticons
import numpy as np

import re
import io

label2emotion = {0: "others", 1: "happy", 2: "sad", 3: "angry"}
emotion2label = {"others": 0, "happy": 1, "sad": 2, "angry": 3}

## imports.py
import spacy
from spacy.lang.pt.stop_words import STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer
import pt_core_news_sm
nlp = pt_core_news_sm.load()

with open("original_text.txt", "r", encoding="utf-8") as f:
        text = " ".join(f.readlines())

doc = nlp(text)

## sentences.py
corpus = [sent.text.lower() for sent in doc.sents ]
cv = CountVectorizer(stop_words=list(STOP_WORDS))
cv_fit=cv.fit_transform(corpus)
word_list = cv.get_feature_names();
count_list = cv_fit.toarray().sum(axis=0)

"""The zip(*iterables) function takes iterables as arguments and returns an iterator.
This iterator generates a series of tuples containing elements from each iterable.
Let's convert these tuples to {word:frequency} dictionary"""

## compute_relative_freq.py
val=sorted(word_frequency.values())
higher_word_frequencies = [word for word,freq in word_frequency.items() if freq in val[-3:]]
print("\nWords with higher frequencies: ", higher_word_frequencies)

# gets relative frequencies of words
higher_frequency = val[-1]
for word in word_frequency.keys():
    word_frequency[word] = (word_frequency[word]/higher_frequency)

## sentence_rank.py
sentence_rank={}
for sent in doc.sents:
    for word in sent :
        if word.text.lower() in word_frequency.keys():
            if sent in sentence_rank.keys():
                sentence_rank[sent]+=word_frequency[word.text.lower()]
            else:
                sentence_rank[sent]=word_frequency[word.text.lower()]

top_sentences=(sorted(sentence_rank.values())[::-1])

## summary.py
summary=[]
for sent,strength in sentence_rank.items():
    if strength in top_sent:
        summary.append(sent)
    else:
        continue

for i in summary:
    print(i,end=" ")
	class AttentionWithContext(Layer):
	"""
	Attention operation, with a context/query vector, for temporal data.
	Supports Masking.
	Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
	"Hierarchical Attention Networks for Document Classification"
	by using a context vector to assist the attention
	# Input shape
	3D tensor with shape: `(samples, steps, features)`.
	# Output shape
	#! encoding=UTF-8
	"""
	kernel canonical correlation analysis
	"""

	import numpy as np
	from scipy.linalg import svd
	from sklearn.metrics.pairwise import pairwise_kernels, euclidean_distances

	class KCCA(object):
	import pandas as pd
	data = pd.read_csv("file.csv", sep=",")
	print(data.head(2))
	with open('file.json', 'w') as f:
	f.write(data.to_json(orient='records', lines=True))

	# check
	data = pd.read_json("file.json", lines=True)
	print(data.head(2))
	from ekphrasis.classes.preprocessor import TextPreProcessor
	from ekphrasis.classes.tokenizer import SocialTokenizer
	from ekphrasis.dicts.emoticons import emoticons
	import numpy as np

	import re
	import io

	label2emotion = {0: "others", 1: "happy", 2: "sad", 3: "angry"}
	emotion2label = {"others": 0, "happy": 1, "sad": 2, "angry": 3}
	import spacy
	from spacy.lang.pt.stop_words import STOP_WORDS
	from sklearn.feature_extraction.text import CountVectorizer
	import pt_core_news_sm
	nlp = pt_core_news_sm.load()

	with open("original_text.txt", "r", encoding="utf-8") as f:
	text = " ".join(f.readlines())

	doc = nlp(text)
	corpus = [sent.text.lower() for sent in doc.sents ]
	cv = CountVectorizer(stop_words=list(STOP_WORDS))
	cv_fit=cv.fit_transform(corpus)
	word_list = cv.get_feature_names();
	count_list = cv_fit.toarray().sum(axis=0)

	"""The zip(*iterables) function takes iterables as arguments and returns an iterator.
	This iterator generates a series of tuples containing elements from each iterable.
	Let's convert these tuples to {word:frequency} dictionary"""
	val=sorted(word_frequency.values())
	higher_word_frequencies = [word for word,freq in word_frequency.items() if freq in val[-3:]]
	print("\nWords with higher frequencies: ", higher_word_frequencies)

	# gets relative frequencies of words
	higher_frequency = val[-1]
	for word in word_frequency.keys():
	word_frequency[word] = (word_frequency[word]/higher_frequency)
	sentence_rank={}
	for sent in doc.sents:
	for word in sent :
	if word.text.lower() in word_frequency.keys():
	if sent in sentence_rank.keys():
	sentence_rank[sent]+=word_frequency[word.text.lower()]
	else:
	sentence_rank[sent]=word_frequency[word.text.lower()]

	top_sentences=(sorted(sentence_rank.values())[::-1])
	summary=[]
	for sent,strength in sentence_rank.items():
	if strength in top_sent:
	summary.append(sent)
	else:
	continue

	for i in summary:
	print(i,end=" ")