This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: UTF-8 -*- | |
import numpy as np | |
import sys | |
class lp_score(): | |
"""docstring for laplacian_score""" | |
def __init__(self, num_of_topic, sigma): | |
# self.fmat = covariance matrix which is the feature |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Implementation of pairwise ranking using scikit-learn LinearSVC | |
Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich, | |
T. Graepel, K. Obermayer. | |
Authors: Fabian Pedregosa <fabian@fseoane.net> | |
Alexandre Gramfort <alexandre.gramfort@inria.fr> | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import numpy as np | |
from sklearn.linear_model import SGDClassifier, SGDRanking | |
from sklearn import metrics | |
from minirank.compat import RankSVM as MinirankSVM | |
from scipy import stats | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
import numpy as np | |
counter = Counter(y) | |
num_neg = counter[counter.keys()[0]] | |
num_pos = counter[counter.keys()[1]] | |
n_samples = len(y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# crf.py (by Graham Neubig) | |
# This script trains conditional random fields (CRFs) | |
# stdin: A corpus of WORD_POS WORD_POS WORD_POS sentences | |
# stdout: Feature vectors for emission and transition properties | |
from collections import defaultdict | |
from math import log, exp | |
import sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'sean' | |
from bs4 import BeautifulSoup | |
import os | |
import cPickle as pickle | |
path = '/Users/sean/ml/dataset/pubmed-bioinfo-abstracts/paperAbstracts/' | |
filenames = os.listdir(path) | |
txt_corpus = list() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#List unique values in a DataFrame column | |
pd.unique(df.column_name.ravel()) | |
#Convert Series datatype to numeric, getting rid of any non-numeric values | |
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
#Grab DataFrame rows where column has certain values | |
valuelist = ['value1', 'value2', 'value3'] | |
df = df[df.column.isin(value_list)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Keras (keras.is) implementation of Recurrent Weighted Average, as described in https://arxiv.org/abs/1703.01253. Follows original implementation in Tensorflow from https://github.com/jostmey/rwa. Works with fixed batch sizes, requires "batch_shape" parameter in input layer. Outputs proper config, should save and restore properly. You are welcome to use/edit or subclass at your will, with advised reference to this gist. | |
''' | |
from keras.layers import Recurrent | |
import keras.backend as K | |
from keras import activations | |
from keras import initializers | |
from keras import regularizers | |
from keras import constraints | |
from keras.engine import Layer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from sklearn.feature_extraction import text | |
stopwords = list(text.ENGLISH_STOP_WORDS) | |
def preprocess(ss): | |
# no email | |
ss = re.sub(r"([a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*(@)(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(\.|\sdot\s))+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)", '', ss) | |
# take care of seat | |
ss = re.sub('SEATS[ ]{0,1}\d{1,2}[A-K]{2,3}', ' ', ss, flags=re.I) | |
ss = re.sub('SEAT[ ]{0,1}\d{1,2}[A-K]', ' ', ss, flags=re.I) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- set all the value of one column empty | |
UPDATE Table Name SET Column Name = Null; |
OlderNewer