undarmaa

## understanding-word-vectors.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                undarmaa
                / understanding-word-vectors.ipynb
            
            
              Created
              March 13, 2019 00:36
                — forked from aparrish/understanding-word-vectors.ipynb
            
              
                Understanding word vectors: A tutorial for "Reading and Writing Electronic Text," a class I teach at ITP. (Python 2.7) Code examples released under CC0 https://creativecommons.org/choose/zero/, other text released under CC BY 4.0 https://creativecommons.org/licenses/by/4.0/
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## text_cluster.py
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# based on http://scikit-learn.org/stable/auto_examples/document_clustering.html

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.metrics.pairwise import pairwise_distances
import numpy as np
from time import time
from collections import defaultdict

## TensorFlow 시작하기.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                undarmaa
                / TensorFlow 시작하기.md
            
            
              Created
              May 11, 2018 00:05
                — forked from haje01/TensorFlow 시작하기.md
            
              
                TensorFlow 시작하기
              
          
    텐서플로우 시작하기

글쓴이: 김정주(haje01@gmail.com)
이 문서는 텐서플로우 공식 페이지 내용을 바탕으로 만들어졌습니다.

소개

텐서플로우(TensorFlow)는 기계 학습과 딥러닝을 위해 구글에서 만든 오픈소스 라이브러리입니다. 데이터 플로우 그래프(Data Flow Graph) 방식을 사용하였습니다.

  
## canopy.py
from sklearn.metrics.pairwise import pairwise_distances
import numpy as np

# X shoudl be a numpy matrix, very likely sparse matrix: http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.csr_matrix.html#scipy.sparse.csr_matrix
# T1 > T2 for overlapping clusters
# T1 = Distance to centroid point to not include in other clusters
# T2 = Distance to centroid point to include in cluster
# T1 > T2 for overlapping clusters
# T1 < T2 will have points which reside in no clusters
# T1 == T2 will cause all points to reside in mutually exclusive clusters

## Dockerfile
# iPython Notebook with per-user storage and config
#
# Based on crosbymichael/ipython
# Creates a Docker image with IPython Notebook installed.
#
# It expects to be run like this:
#
# docker run -v /home/eduard/notebooks/eduard:/notebooks benthoo/ipython-user
#
# You provide a folder per user on the host system. This folder will hold the users notebooks and also needs to contain the

## compressed_lasso_example.py
from sklearn.datasets.samples_generator import make_regression
from sklearn.linear_model import Lasso
import numpy as np

X, y = make_regression(n_samples=200, n_features=5000, random_state=0)

alpha = 1
model = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)

model.fit(X, y)

## knn_wine.py
import pandas as pd
import pylab as pl
from sklearn.neighbors import KNeighborsClassifier


df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")

test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
train = df[test_idx==True]
test = df[test_idx==False]

## cluster.py
import numpy as np
import numpy
import theano
import theano.tensor as T
from theano import function, config, shared, sandbox
from theano import ProfileMode
from sklearn import cluster, datasets
import matplotlib.pyplot as plt

def rsom(data, cluster_num, alpha, epochs = -1, batch = 1, verbose = False):
	#!/usr/bin/env python
	# -- encoding: utf-8 --
	# based on http://scikit-learn.org/stable/auto_examples/document_clustering.html

	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.cluster import KMeans, MiniBatchKMeans
	from sklearn.metrics.pairwise import pairwise_distances
	import numpy as np
	from time import time
	from collections import defaultdict
	from sklearn.metrics.pairwise import pairwise_distances
	import numpy as np

	# X shoudl be a numpy matrix, very likely sparse matrix: http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.csr_matrix.html#scipy.sparse.csr_matrix
	# T1 > T2 for overlapping clusters
	# T1 = Distance to centroid point to not include in other clusters
	# T2 = Distance to centroid point to include in cluster
	# T1 > T2 for overlapping clusters
	# T1 < T2 will have points which reside in no clusters
	# T1 == T2 will cause all points to reside in mutually exclusive clusters
	# iPython Notebook with per-user storage and config
	#
	# Based on crosbymichael/ipython
	# Creates a Docker image with IPython Notebook installed.
	#
	# It expects to be run like this:
	#
	# docker run -v /home/eduard/notebooks/eduard:/notebooks benthoo/ipython-user
	#
	# You provide a folder per user on the host system. This folder will hold the users notebooks and also needs to contain the
	from sklearn.datasets.samples_generator import make_regression
	from sklearn.linear_model import Lasso
	import numpy as np

	X, y = make_regression(n_samples=200, n_features=5000, random_state=0)

	alpha = 1
	model = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)

	model.fit(X, y)
	import pandas as pd
	import pylab as pl
	from sklearn.neighbors import KNeighborsClassifier


	df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")

	test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
	train = df[test_idx==True]
	test = df[test_idx==False]
	import numpy as np
	import numpy
	import theano
	import theano.tensor as T
	from theano import function, config, shared, sandbox
	from theano import ProfileMode
	from sklearn import cluster, datasets
	import matplotlib.pyplot as plt

	def rsom(data, cluster_num, alpha, epochs = -1, batch = 1, verbose = False):