Zhibo Xiao aurora1625

## laplacian_score.py
# -*- coding: UTF-8 -*-

import numpy as np
import sys


class lp_score():
    """docstring for laplacian_score"""
    def __init__(self, num_of_topic, sigma):
        # self.fmat = covariance matrix which is the feature

## ranking.py
"""
Implementation of pairwise ranking using scikit-learn LinearSVC

Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer.

Authors: Fabian Pedregosa <fabian@fseoane.net>
         Alexandre Gramfort <alexandre.gramfort@inria.fr>
"""

## bench_rocsgd.py
import itertools

import numpy as np

from sklearn.linear_model import SGDClassifier, SGDRanking
from sklearn import metrics
from minirank.compat import RankSVM as MinirankSVM
from scipy import stats


## undersample.py
from collections import Counter
import numpy as np


counter = Counter(y)
num_neg = counter[counter.keys()[0]]
num_pos = counter[counter.keys()[1]]

n_samples = len(y)

## crf.py
#!/usr/bin/python

# crf.py (by Graham Neubig)
#  This script trains conditional random fields (CRFs)
#  stdin: A corpus of WORD_POS WORD_POS WORD_POS sentences
#  stdout: Feature vectors for emission and transition properties

from collections import defaultdict
from math import log, exp
import sys

## extract_pubmed_abstract.py
__author__ = 'sean'

from bs4 import BeautifulSoup
import os
import cPickle as pickle

path = '/Users/sean/ml/dataset/pubmed-bioinfo-abstracts/paperAbstracts/'
filenames = os.listdir(path)

txt_corpus = list()

## useful_pandas_snippets.py
#List unique values in a DataFrame column
pd.unique(df.column_name.ravel())

#Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

#Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(value_list)]

## rwa.py
'''
Keras (keras.is) implementation of Recurrent Weighted Average, as described in https://arxiv.org/abs/1703.01253. Follows original implementation in Tensorflow from https://github.com/jostmey/rwa. Works with fixed batch sizes, requires "batch_shape" parameter in input layer. Outputs proper config, should save and restore properly. You are welcome to use/edit or subclass at your will, with advised reference to this gist.
'''
from keras.layers import Recurrent
import keras.backend as K
from keras import activations
from keras import initializers
from keras import regularizers
from keras import constraints
from keras.engine import Layer

## preprocessing.py
import re
from sklearn.feature_extraction import text

stopwords = list(text.ENGLISH_STOP_WORDS)
def preprocess(ss):
    # no email
    ss = re.sub(r"([a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*(@)(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(\.|\sdot\s))+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)", '', ss)
    # take care of seat
    ss = re.sub('SEATS[ ]{0,1}\d{1,2}[A-K]{2,3}', ' ', ss, flags=re.I)
    ss = re.sub('SEAT[ ]{0,1}\d{1,2}[A-K]', ' ', ss, flags=re.I)

## sql_snippet
-- set all the value of one column empty
UPDATE Table Name SET Column Name = Null;
	# -- coding: UTF-8 --

	import numpy as np
	import sys


	class lp_score():
	"""docstring for laplacian_score"""
	def __init__(self, num_of_topic, sigma):
	# self.fmat = covariance matrix which is the feature
	"""
	Implementation of pairwise ranking using scikit-learn LinearSVC

	Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
	T. Graepel, K. Obermayer.

	Authors: Fabian Pedregosa <fabian@fseoane.net>
	Alexandre Gramfort <alexandre.gramfort@inria.fr>
	"""
	import itertools

	import numpy as np

	from sklearn.linear_model import SGDClassifier, SGDRanking
	from sklearn import metrics
	from minirank.compat import RankSVM as MinirankSVM
	from scipy import stats
	from collections import Counter
	import numpy as np


	counter = Counter(y)
	num_neg = counter[counter.keys()[0]]
	num_pos = counter[counter.keys()[1]]

	n_samples = len(y)
	#!/usr/bin/python

	# crf.py (by Graham Neubig)
	# This script trains conditional random fields (CRFs)
	# stdin: A corpus of WORD_POS WORD_POS WORD_POS sentences
	# stdout: Feature vectors for emission and transition properties

	from collections import defaultdict
	from math import log, exp
	import sys
	__author__ = 'sean'

	from bs4 import BeautifulSoup
	import os
	import cPickle as pickle

	path = '/Users/sean/ml/dataset/pubmed-bioinfo-abstracts/paperAbstracts/'
	filenames = os.listdir(path)

	txt_corpus = list()
	#List unique values in a DataFrame column
	pd.unique(df.column_name.ravel())

	#Convert Series datatype to numeric, getting rid of any non-numeric values
	df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

	#Grab DataFrame rows where column has certain values
	valuelist = ['value1', 'value2', 'value3']
	df = df[df.column.isin(value_list)]
	'''
	Keras (keras.is) implementation of Recurrent Weighted Average, as described in https://arxiv.org/abs/1703.01253. Follows original implementation in Tensorflow from https://github.com/jostmey/rwa. Works with fixed batch sizes, requires "batch_shape" parameter in input layer. Outputs proper config, should save and restore properly. You are welcome to use/edit or subclass at your will, with advised reference to this gist.
	'''
	from keras.layers import Recurrent
	import keras.backend as K
	from keras import activations
	from keras import initializers
	from keras import regularizers
	from keras import constraints
	from keras.engine import Layer
	import re
	from sklearn.feature_extraction import text

	stopwords = list(text.ENGLISH_STOP_WORDS)
	def preprocess(ss):
	# no email
	ss = re.sub(r"([a-zA-Z0-9!#$%&'+\/=?^_`{\|}~-]+(?:\.[a-zA-Z0-9!#$%&'+\/=?^_`{\|}~-]+)(@)(?:[a-zA-Z0-9](?:[a-zA-Z0-9-][a-zA-Z0-9])?(\.\|\sdot\s))+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)", '', ss)
	# take care of seat
	ss = re.sub('SEATS[ ]{0,1}\d{1,2}[A-K]{2,3}', ' ', ss, flags=re.I)
	ss = re.sub('SEAT[ ]{0,1}\d{1,2}[A-K]', ' ', ss, flags=re.I)
	-- set all the value of one column empty
	UPDATE Table Name SET Column Name = Null;