Rodrigo Loredo rloredo

## accrual_v1.py
import pandas as pd
from pandas.tseries.offsets import MonthEnd

def assign_to_months(start_date:pd.Timestamp, end_date:pd.Timestamp, total:float, year:int) -> pd.Series:
        """
        start_date: start date of the contract
        end_date: end date of the contract
        total: total amount of the contract
        year: year to be accrued, everything outside this year will not be accrued
        """

## poisson_timeseries_generator.py
import math
import random
from datetime import datetime, timedelta

def poisson_timeseries_generator(tau, start_time, end_time):
    """
    Generates a time series with a poisson distribution

    tau: interval lenght i.e. mean of the expected time between events
    start_time: starting date of the time series.  %Y-%m-%d %H:%M:%S format

## stanza.py
import stanza
import pandas as pd

#Load a dataframe with text in one column
df = pd.DataFrame({'label':[1], 'text' : ['Hi Juan Carlos'] })

#Initialize the engine. In this case in Portuguese
nlp_pt = stanza.Pipeline(lang='pt', processors='tokenize,mwt,pos,lemma')

#Tokenize, lemmatize and POS

## LDA_sklearn.py
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
#For cvectorizer
def do_nothing(x):
    return x

#Create CV matrix
#Use max_df to delete words that appears in more than x.x% of documents (float is %)
#Use min_df to delete words that appears in less than x documents (int is x)
#Use ngram_range to create ngrams and use them as extra features

## gensim_doc2vec.py
import gensim

#split train/test if necessary
end = -500
#docs is a pd.Series with lists of tokens representing each document
#don't forget to normalize tokens (to lower, strip accents, etc)
train = [gensim.models.doc2vec.TaggedDocument(d, [i]) for i, d in enumerate(docs.values[:end])]
test = docs.values[end:]

#doc2vec needs tagged docs

## cosine_similarity.js
function cosinesim(A,B){
    var dotproduct=0;
    var mA=0;
    var mB=0;
    for(i = 0; i < A.length; i++){
        dotproduct += (A[i] * B[i]);
        mA += (A[i]*A[i]);
        mB += (B[i]*B[i]);
    }
    mA = Math.sqrt(mA);
	import pandas as pd
	from pandas.tseries.offsets import MonthEnd

	def assign_to_months(start_date:pd.Timestamp, end_date:pd.Timestamp, total:float, year:int) -> pd.Series:
	"""
	start_date: start date of the contract
	end_date: end date of the contract
	total: total amount of the contract
	year: year to be accrued, everything outside this year will not be accrued
	"""
	import math
	import random
	from datetime import datetime, timedelta

	def poisson_timeseries_generator(tau, start_time, end_time):
	"""
	Generates a time series with a poisson distribution

	tau: interval lenght i.e. mean of the expected time between events
	start_time: starting date of the time series. %Y-%m-%d %H:%M:%S format
	import stanza
	import pandas as pd

	#Load a dataframe with text in one column
	df = pd.DataFrame({'label':[1], 'text' : ['Hi Juan Carlos'] })

	#Initialize the engine. In this case in Portuguese
	nlp_pt = stanza.Pipeline(lang='pt', processors='tokenize,mwt,pos,lemma')

	#Tokenize, lemmatize and POS
	from sklearn.decomposition import LatentDirichletAllocation
	from sklearn.feature_extraction.text import CountVectorizer
	#For cvectorizer
	def do_nothing(x):
	return x

	#Create CV matrix
	#Use max_df to delete words that appears in more than x.x% of documents (float is %)
	#Use min_df to delete words that appears in less than x documents (int is x)
	#Use ngram_range to create ngrams and use them as extra features
	import gensim

	#split train/test if necessary
	end = -500
	#docs is a pd.Series with lists of tokens representing each document
	#don't forget to normalize tokens (to lower, strip accents, etc)
	train = [gensim.models.doc2vec.TaggedDocument(d, [i]) for i, d in enumerate(docs.values[:end])]
	test = docs.values[end:]

	#doc2vec needs tagged docs
	function cosinesim(A,B){
	var dotproduct=0;
	var mA=0;
	var mB=0;
	for(i = 0; i < A.length; i++){
	dotproduct += (A[i] * B[i]);
	mA += (A[i]*A[i]);
	mB += (B[i]*B[i]);
	}
	mA = Math.sqrt(mA);