Skip to content

Instantly share code, notes, and snippets.

View duarteocarmo's full-sized avatar
💭

Duarte OC duarteocarmo

💭
View GitHub Profile
from Project_Import_data import raw
from Project_Import_data import header
import numpy as np
import math
import random
import pandas as pd
since_first = header[26]
since_last = header[27]
from Project_Clean_data import raw
from Project_Clean_data import header
import numpy as np
import xlsxwriter
# We could also define binary headers and only calculate there.
# build statistics function
def __get_basic_stats__(x):
@duarteocarmo
duarteocarmo / output.sh
Created October 9, 2017 08:31
Query for pages that start with A.
/Users/duarteocarmo/anaconda/bin/python /Users/duarteocarmo/PycharmProjects/Big_Data_Good/Challenge_1/test_letter.py
For the pattern "cat" [0, 16] "are" [2, 6] "to" we found the following matches in 0:00:38.24:
cations are used to
cations== arrays are used to
cation modalities are audito
cation software|deskto
cation software|deskto
cation factors are used to
cations when compared to
@duarteocarmo
duarteocarmo / 1.txt
Created October 9, 2017 12:03
Output of whole wikipedia query
For the pattern "cat" [0, 16] "are" [2, 6] "to" we found 6363 matches in 0:15:43.31:
['cations are used to']
['cations== arrays are used to']
['cation modalities are audito']
['catalogue == compared to']
['cations are the to']
['cations since they are easy to']
['cations are used to']
['catalan has a shared histo']
['cations) are able to']
@duarteocarmo
duarteocarmo / Final_NeuralNetwork.py
Created November 14, 2017 08:22
Neural Network Regression Problem.
# exercise 8.2.6
from matplotlib.pyplot import figure, plot, subplot, title, show, bar, legend, scatter
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
import neurolab as nl
from sklearn import model_selection
from scipy import stats
from Project_Clean_data import raw, header, is_binary
@duarteocarmo
duarteocarmo / Project_2_DecisionTree.py
Created November 14, 2017 08:25
Classification - Decision Tree
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 11 09:16:48 2017
@author: sviglios
"""
from sklearn import cross_validation, tree
from sklearn.metrics import confusion_matrix
from Project_Clean_data import raw, header, is_binary
@duarteocarmo
duarteocarmo / Project_2_LogisticRegression.py
Created November 14, 2017 08:27
Classification - Logistic Regression
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 12 09:54:58 2017
@author: sviglios
"""
from Project_Clean_data import raw, header, is_binary
from sklearn import cross_validation, tree
import sklearn.linear_model as lm
# exercise 11.4.1
import numpy as np
from matplotlib.pyplot import (figure, imshow, bar, title, xticks, yticks, cm,
subplot, show, legend, hold)
from matplotlib.pyplot import (figure, hold, subplot, plot, xlabel, ylabel,
xticks, yticks,legend,show)
from scipy.io import loadmat
from toolbox_02450 import gausKernelDensity
from sklearn.neighbors import NearestNeighbors
from Project_Clean_data import raw, header, standardize_this
import numpy as np
from subprocess import run
import re
import os
import time
from sys import platform
import matplotlib.pyplot as plt
import seaborn as sns
from Project_Clean_data import raw, header, standardize_this
from matplotlib.pyplot import figure, show, savefig, close
from toolbox_02450 import clusterplot
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
from Project_Clean_data import raw, header, standardize_this
import numpy as np
import seaborn as sns
# select attribute to predict
target_attribute_name = 'Dx'
target_index = list(header).index(target_attribute_name)