Skip to content

Instantly share code, notes, and snippets.

Created June 21, 2014 23:49
Show Gist options
  • Save anonymous/24f82fc039956a29fd6e to your computer and use it in GitHub Desktop.
Save anonymous/24f82fc039956a29fd6e to your computer and use it in GitHub Desktop.
Pasted from IPython
from numpy import matrix
matrix('1908 January 4.5 -5.6')
matrix("1908 'January' 4.5 -5.6")
matrix([4.5 -5.6],[2, 4])
matrix([[4.5 -5.6],[2, 4]])
matrix([[4.5 5.6],[2, 4]])
matrix([[4.5, -5.6],[2, 4]])
matrix([4.5, -5.6],[2, 4])
matrix([[4.5, -5.6],[2, 4]])
matrix([[4.5, -5.6],[2, 4]], header=['ss','b'])
matrix([[4.5, -5.6],[2, 4]], dtype=['ss','b'])
matrix([[4.5, -5.6],[2, 4]], dtype=('ss','b'))
import numpy
matrix([[4.5, -5.6],[2, 4]])
matrix([[4.5, -5.6,6],[2, 4,5]])
numpy.mat?
numpy.matrix?
matrix([[4.5, -5.6,6],[2, 4,5]])
numpy.array([[4.5, -5.6,6],[2, 4,5]])
x = numpy.array([[4.5, -5.6,6],[2, 4,5]])
y = matrix([[4.5, -5.6,6],[2, 4,5]])
y[0]
x[0]
numpy.array([[4.5, -5.6],[2, 4]], dtype=[('a','float'),('b','float')])
numpy.array([(4.5, -5.6),(2, 4)]
, dtype=[('a','float'),('b','float')])
numpy.array([(4.5, -5.6),(2, 4)], dtype=[('a','float'),('b','float')])
numpy.array([(4.5, -5.6),(2, 4)], dtype=[('a','float'),('b','float')])
g = numpy.array([(4.5, -5.6),(2, 4)], dtype=[('a','float'),('b','float')])
g['a']
g
numpy.hstack(g,[2,4])
numpy.hstack(g)
numpy.hstack((g,[2,4]))
numpy.hstack((g,numpy.array([2,4])))
numpy.array([2,4])
g = numpy.array([(4.5, -5.6),(2, 4)], dtype=[('a','float'),('b','float')])
f = numpy.array([(5, -6),(-2, 4)], dtype=[('a','float'),('b','float')])
g
f
numpy.array(g,f)
numpy.hstack(g,f)
numpy.hstack((g,f))
numpy.hstack((g,f))[0]
numpy.hstack((g,f))]
numpy.hstack((g,f))['a']
numpy.array([('a', -6),('b', 4)], dtype=[('a','string'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','string'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','char'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','string;),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','string'),('b','float')])
numpy.array([(a, -6),(b, 4)], dtype=[('gg','string'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','string'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','string'),('b','float')])[0]
numpy.array([('a', -6),('b', 4)], dtype=[('gg','object'),('b','float')])[0]
numpy.array([('a', -6),('b', 4)], dtype=[('gg','object'),('b','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','object'),('b','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=[('gg','object'),('b','float')])['gg'][0]
numpy.array([('a', -6),('b', 4)], dtype=[('gg','object'),('b','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=object)['gg']
numpy.array([('a', -6),('b', 4)], dtype=object)
numpy.array([('a', -6),('b', 4)], dtype=object)[0]
numpy.array([('a', -6),('b', 4)], dtype=[('gg','float64'),('b','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=[('gg','float64'),('b','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=[('gg','|S14'),('b','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=[('gg','|S14'),('ff','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=[('gg','|S10'),('ff','float')])['gg']
numpy.array([('a', -6),('b', 4)], dtype=[('gg','|S10'),('ff','float')])
numpy.array([('a', -6),('b', 4)], dtype=[('gg','|S10'),('ff','float')])
numpy.array([('a', '-6'),('b', 4)], dtype=[('gg','|S10'),('ff','float')])
numpy.array([('a', '-6', 3),('b', 4, 5)], dtype=[('gg','|S10'),('ff','float'), ('hh','float')])
numpy.array([('a', '-6', 3),('b', 4, 5)], dtype=[('gg','|S10'),('ff','float'), ('hh','float')])['hh']
ls
%load temperature_prediction.py
from itertools import combinations
from operator import and_
import sys
from numpy import array, hstack
# libraries enabled: numpy, scipy, sklearn, nltk
# testcases/minima.txt
class PredictTemp:
'''
Apply AND operator on subsets with given conditions
'''
def __init__(self, size, heads, ip):
self.N = size
self.data = array(ip, dtype=[(heads[0],'int'), (heads[1],'|S10'), (heads[2], 'float'), (heads[3], 'float')])
print self.data
def predict_already(self):
[self.subsets.extend(list(combinations(self.parent, x))) \
for x in xrange(2, self.N + 1)]
#print self.subsets
[self.results.append(reduce(and_, subset)) \
for subset in self.subsets]
return min(self.result)
if __name__ == '__main__':
N = int(raw_input())
assert 1<=N<=1500
COL_HEADS = raw_input()
ans = []
ip = []
for i in xrange(N):
temp = raw_input().split()
assert len(temp) == 4
#temp[0], temp[2], temp[3] = int(temp[0]), float(temp[2]), float(temp[3])
#assert 1908<=int(temp[0])<=2013 and -75<=int(temp[2])<=75 and -75<=int(temp[3])<=75
ip.append(temp)
PT = PredictTemp(N, COL_HEADS, ip)
# ans.append(PT.predict_already())
# for i in ans: print
PredictTemp.predict_already()
ip
array(ip, dtype=[(heads[0],'int'), (heads[1],'|S10'), (heads[2], 'float'), (heads[3], 'float')])
heads = 'yyyy month tmax tmin'.split()
heads
array(ip, dtype=[(heads[0],'int'), (heads[1],'|S10'), (heads[2], 'float'), (heads[3], 'float')])
ip
array(ip, dtype=[(heads[0],'int'), (heads[1],'|S10'), (heads[2], 'float'), (heads[3], 'float')])
array(ip)
array(ip)[0]
x = array(ip)
x[:,]
x[:,0]
x[:,1]
x[:,2]
x[:,3]
x[:,4]
x[:,3]
x[:,2]
x[:,2]
x.reshape(2,2)
x.reshape(4,2)
x.reshape(2,4)
x.reshape(4)
x.reshape()
x[:,[1,2]]
x[:,[2,3]]
x[:4,[2,3]]
x[,[2,3]]
x[:,[2,3]]
print str(x)
x
x.clip(0,1)
x[:,[2,3]].clip(0,1)
x[:,[2,3]]
x = array(ip)
x
x[:,[2,3]].clip(0,1)
str(x[:,[2,3]]).clip(0,1)
x[:,[2,3]].clip(0,1)
x[:,[2,3]]
from sklearn.preprocessing import Imputer
x[:,[2,3]]
Imputer(x[:,[2,3]])
Imputer(x)
x[:,[2,3]]
x
Imputer(x)
c = Imputer(x)
c.missing_values
Imputer?
Imputer(x[:,[2,3]])
x
Imputer(x)
x.clip(0,1)
x.clip?
x[:,[2,3]].clip(0,1)
x[:,[2,3]].clip([0,1])
x[:,[2,3]].clip(0,1)
x[:3,[2,3]].clip(0,1)
x[:3,[2,3]]
x[:3,[2,3]].clip(0,1)
from sklearn.ensemble import RandomForestRegressor
x.shape[0]
x
x.shape[1]
x.shape[2]
x.shape[2][0]
x.shape[2]
x[2].shape[2]
x
x[:3,[2,3]].shape[1]
x.shape?
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import cross_val_score
from sklearn.datasets import load_boston
dataset = load_boston()
dataset
import numpy as np
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.cross_validation import cross_val_score
rng = np.random.RandomState(0)
rng
X_full, y_full = dataset.data, dataset.target
n_samples = X_full.shape[0]
n_features = X_full.shape[1]
X_full
n_ss
n_samples
%paste
missing_rate = 0.75
n_missing_samples = np.floor(n_samples * missing_rate)
n_missing_samples
%paste
missing_samples
missing_features = rng.randint(0, n_features, n_missing_samples)
missing_features
%paste
score
import sklearn
sklearn.__version__
%paste
@arcolife
Copy link

import numpy as np

from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.cross_validation import cross_val_score

rng = np.random.RandomState(0)

dataset = load_boston()
X_full, y_full = dataset.data, dataset.target
n_samples = X_full.shape[0]
n_features = X_full.shape[1]

# Estimate the score on the entire dataset, with no missing values
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
score = cross_val_score(estimator, X_full, y_full).mean()
print("Score with the entire dataset = %.2f" % score)

# Add missing values in 75% of the lines
missing_rate = 0.75
n_missing_samples = np.floor(n_samples * missing_rate)
missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
                                      dtype=np.bool),
                             np.ones(n_missing_samples,
                                     dtype=np.bool)))
rng.shuffle(missing_samples)
missing_features = rng.randint(0, n_features, n_missing_samples)

# Estimate the score without the lines containing missing values
X_filtered = X_full[~missing_samples, :]
y_filtered = y_full[~missing_samples, :]
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
score = cross_val_score(estimator, X_filtered, y_filtered).mean()
print("Score without the samples containing missing values = %.2f" % score)

# Estimate the score after imputation of the missing values
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = Pipeline([("imputer", Imputer(missing_values=0,
                                          strategy="mean",
                                          axis=0)),
                      ("forest", RandomForestRegressor(random_state=0,
                                                       n_estimators=100))])
score = cross_val_score(estimator, X_missing, y_missing).mean()
print("Score after imputation of the missing values = %.2f" % score)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment