Skip to content

Instantly share code, notes, and snippets.

View luisfredgs's full-sized avatar
🎯
Focusing

Luís Gonçalves luisfredgs

🎯
Focusing
View GitHub Profile
@luisfredgs
luisfredgs / AttentionWithContext.py
Created September 9, 2018 20:27 — forked from rmdort/AttentionWithContext.py
Keras Layer that implements an Attention mechanism, with a context/query vector, for temporal data. Supports Masking. Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] "Hierarchical Attention Networks for Document Classification"
class AttentionWithContext(Layer):
"""
Attention operation, with a context/query vector, for temporal data.
Supports Masking.
Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
"Hierarchical Attention Networks for Document Classification"
by using a context vector to assist the attention
# Input shape
3D tensor with shape: `(samples, steps, features)`.
# Output shape
@luisfredgs
luisfredgs / kcca.py
Created January 4, 2019 12:55 — forked from yuyay/kcca.py
kernel canonical correlation analysis in python
#! encoding=UTF-8
"""
kernel canonical correlation analysis
"""
import numpy as np
from scipy.linalg import svd
from sklearn.metrics.pairwise import pairwise_kernels, euclidean_distances
class KCCA(object):
@luisfredgs
luisfredgs / pandas_csv_to_json.py
Last active September 27, 2022 14:39
Convert pandas dataframe from CSV to JSON
import pandas as pd
data = pd.read_csv("file.csv", sep=",")
print(data.head(2))
with open('file.json', 'w') as f:
f.write(data.to_json(orient='records', lines=True))
# check
data = pd.read_json("file.json", lines=True)
print(data.head(2))
@luisfredgs
luisfredgs / latex2png_win.md
Created July 30, 2019 13:28 — forked from retorillo/latex2png_win.md
LaTeX to PNG on Windows

LaTeX to PNG on Windows

Prerequisite

  • Windows System (Tested on Windows 10)
  • pdflatex TexLive (/w standalone package)
  • magick Image Magick
    • choco install imagemagick
  • gswin32c Ghost Script (x86)
from ekphrasis.classes.preprocessor import TextPreProcessor
from ekphrasis.classes.tokenizer import SocialTokenizer
from ekphrasis.dicts.emoticons import emoticons
import numpy as np
import re
import io
label2emotion = {0: "others", 1: "happy", 2: "sad", 3: "angry"}
emotion2label = {"others": 0, "happy": 1, "sad": 2, "angry": 3}
@luisfredgs
luisfredgs / imports.py
Last active April 18, 2020 19:57
Extractive text summarization spacy
import spacy
from spacy.lang.pt.stop_words import STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer
import pt_core_news_sm
nlp = pt_core_news_sm.load()
with open("original_text.txt", "r", encoding="utf-8") as f:
text = " ".join(f.readlines())
doc = nlp(text)
corpus = [sent.text.lower() for sent in doc.sents ]
cv = CountVectorizer(stop_words=list(STOP_WORDS))
cv_fit=cv.fit_transform(corpus)
word_list = cv.get_feature_names();
count_list = cv_fit.toarray().sum(axis=0)
"""The zip(*iterables) function takes iterables as arguments and returns an iterator.
This iterator generates a series of tuples containing elements from each iterable.
Let's convert these tuples to {word:frequency} dictionary"""
val=sorted(word_frequency.values())
higher_word_frequencies = [word for word,freq in word_frequency.items() if freq in val[-3:]]
print("\nWords with higher frequencies: ", higher_word_frequencies)
# gets relative frequencies of words
higher_frequency = val[-1]
for word in word_frequency.keys():
word_frequency[word] = (word_frequency[word]/higher_frequency)
sentence_rank={}
for sent in doc.sents:
for word in sent :
if word.text.lower() in word_frequency.keys():
if sent in sentence_rank.keys():
sentence_rank[sent]+=word_frequency[word.text.lower()]
else:
sentence_rank[sent]=word_frequency[word.text.lower()]
top_sentences=(sorted(sentence_rank.values())[::-1])
summary=[]
for sent,strength in sentence_rank.items():
if strength in top_sent:
summary.append(sent)
else:
continue
for i in summary:
print(i,end=" ")