Skip to content

Instantly share code, notes, and snippets.

@aurora1625
aurora1625 / laplacian_score.py
Created May 8, 2013 00:27
Python implementation of Laplacian score
# -*- coding: UTF-8 -*-
import numpy as np
import sys
class lp_score():
"""docstring for laplacian_score"""
def __init__(self, num_of_topic, sigma):
# self.fmat = covariance matrix which is the feature
"""
Implementation of pairwise ranking using scikit-learn LinearSVC
Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
T. Graepel, K. Obermayer.
Authors: Fabian Pedregosa <fabian@fseoane.net>
Alexandre Gramfort <alexandre.gramfort@inria.fr>
"""
import itertools
import numpy as np
from sklearn.linear_model import SGDClassifier, SGDRanking
from sklearn import metrics
from minirank.compat import RankSVM as MinirankSVM
from scipy import stats
from collections import Counter
import numpy as np
counter = Counter(y)
num_neg = counter[counter.keys()[0]]
num_pos = counter[counter.keys()[1]]
n_samples = len(y)
@aurora1625
aurora1625 / crf.py
Created November 8, 2013 02:28 — forked from neubig/crf.py
#!/usr/bin/python
# crf.py (by Graham Neubig)
# This script trains conditional random fields (CRFs)
# stdin: A corpus of WORD_POS WORD_POS WORD_POS sentences
# stdout: Feature vectors for emission and transition properties
from collections import defaultdict
from math import log, exp
import sys
@aurora1625
aurora1625 / extract_pubmed_abstract.py
Created November 20, 2013 23:46
Extract only the abstract of PUBMED raw data
__author__ = 'sean'
from bs4 import BeautifulSoup
import os
import cPickle as pickle
path = '/Users/sean/ml/dataset/pubmed-bioinfo-abstracts/paperAbstracts/'
filenames = os.listdir(path)
txt_corpus = list()
@aurora1625
aurora1625 / useful_pandas_snippets.py
Created September 24, 2015 05:04 — forked from bsweger/useful_pandas_snippets.md
Useful Pandas Snippets
#List unique values in a DataFrame column
pd.unique(df.column_name.ravel())
#Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
#Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(value_list)]
@aurora1625
aurora1625 / rwa.py
Last active November 29, 2020 01:59 — forked from shamatar/rwa.py
Recurrent Weighted Average
'''
Keras (keras.is) implementation of Recurrent Weighted Average, as described in https://arxiv.org/abs/1703.01253. Follows original implementation in Tensorflow from https://github.com/jostmey/rwa. Works with fixed batch sizes, requires "batch_shape" parameter in input layer. Outputs proper config, should save and restore properly. You are welcome to use/edit or subclass at your will, with advised reference to this gist.
'''
from keras.layers import Recurrent
import keras.backend as K
from keras import activations
from keras import initializers
from keras import regularizers
from keras import constraints
from keras.engine import Layer
@aurora1625
aurora1625 / preprocessing.py
Last active March 27, 2022 23:26
#text preprocessing
import re
from sklearn.feature_extraction import text
stopwords = list(text.ENGLISH_STOP_WORDS)
def preprocess(ss):
# no email
ss = re.sub(r"([a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*(@)(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(\.|\sdot\s))+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)", '', ss)
# take care of seat
ss = re.sub('SEATS[ ]{0,1}\d{1,2}[A-K]{2,3}', ' ', ss, flags=re.I)
ss = re.sub('SEAT[ ]{0,1}\d{1,2}[A-K]', ' ', ss, flags=re.I)
@aurora1625
aurora1625 / sql_snippet
Last active February 19, 2020 02:38
SQL snippets
-- set all the value of one column empty
UPDATE Table Name SET Column Name = Null;