jenjenjiang

## logistic_regression.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jenjenjiang
                / logistic_regression.ipynb
            
            
              Created
              August 20, 2019 09:26
            
              
                logistic regression library in python 
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## csv_linear_regression.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jenjenjiang
                / csv_linear_regression.ipynb
            
            
              Last active
              August 20, 2019 09:16
            
              
                Read a CSV file and build a regression model. Make prediction as an example.
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## google_map.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jenjenjiang
                / google_map.ipynb
            
            
              Last active
              August 20, 2019 09:15
            
              
                Use Google Place API to request data and store it in python format
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## dictionary.ipynb

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jenjenjiang
                / dictionary.ipynb
            
            
              Last active
              August 20, 2019 09:17
            
              
                Panda examples
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## read a file and give weights of each term
from os import path
import jieba
import jieba.analyse as ja
pip install jieba
from gensim.test.utils import common_texts, get_tmpfile

with open('../JJ/lyric.txt', 'r') as handle:
    print(handle)
    for line in handle:
        tags = ja.extract_tags(line, topK=10, withWeight=True)

## DecisionTreeClassifier predict
# DecisionTreeClassifier predict
from sklearn.metrics import accuracy_score
x_test = dev_test.iloc[:,0:46]
x_test_1 = x_test.drop(['Client_MYOL_Statut'], axis=1, inplace=False)
x_t1_m = x_test_1.as_matrix()
test_pred = clf.predict(x_t1_m)
y_test = dev_test.Client_Abo_1819.as_matrix()

## Silhouette Analysis for the value of k
#use Silhouette Analysis
from sklearn.metrics import silhouette_samples, silhouette_score
range_n_clusters = [2, 3, 4, 5, 6,7,8,9]
for n_clusters in range_n_clusters:
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(data_std)
    labels = kmeans.labels_
    silhouette_avg = silhouette_score(data_std, labels)
    print("For n_clusters =", n_clusters,
          "The average silhouette_score is :", silhouette_avg)

## data clean.txt
import pandas as pd
import numpy as np
import sys

def get_size(total_size, percentage, mean):
    size_train = int(percentage * total_size / 100)
    size_client_abo = [int(size_train * (1 - mean)), int(size_train * mean)]
    return size_client_abo

def populate(data, indexs, sizes):

## Retrain the linear and polynomial models
#1 Retrain the linear and polynomial models
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

X_train, X_test, y_train, y_test = train_test_split(
   X, Y, test_size=0.3, random_state=0)

## plot.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jenjenjiang
                / plot.ipynb
            
            
              Last active
              August 20, 2019 09:23
            
              
                Plot the average distances and minimum distances with respect to dimensions
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from os import path
	import jieba
	import jieba.analyse as ja
	pip install jieba
	from gensim.test.utils import common_texts, get_tmpfile

	with open('../JJ/lyric.txt', 'r') as handle:
	print(handle)
	for line in handle:
	tags = ja.extract_tags(line, topK=10, withWeight=True)
	# DecisionTreeClassifier predict
	from sklearn.metrics import accuracy_score
	x_test = dev_test.iloc[:,0:46]
	x_test_1 = x_test.drop(['Client_MYOL_Statut'], axis=1, inplace=False)
	x_t1_m = x_test_1.as_matrix()
	test_pred = clf.predict(x_t1_m)
	y_test = dev_test.Client_Abo_1819.as_matrix()
	#use Silhouette Analysis
	from sklearn.metrics import silhouette_samples, silhouette_score
	range_n_clusters = [2, 3, 4, 5, 6,7,8,9]
	for n_clusters in range_n_clusters:
	kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(data_std)
	labels = kmeans.labels_
	silhouette_avg = silhouette_score(data_std, labels)
	print("For n_clusters =", n_clusters,
	"The average silhouette_score is :", silhouette_avg)
	import pandas as pd
	import numpy as np
	import sys

	def get_size(total_size, percentage, mean):
	size_train = int(percentage * total_size / 100)
	size_client_abo = [int(size_train * (1 - mean)), int(size_train * mean)]
	return size_client_abo

	def populate(data, indexs, sizes):
	#1 Retrain the linear and polynomial models
	import pandas as pd
	from sklearn.linear_model import LinearRegression
	from sklearn import metrics
	from sklearn.metrics import mean_squared_error
	from sklearn.preprocessing import PolynomialFeatures
	from sklearn.metrics import r2_score

	X_train, X_test, y_train, y_test = train_test_split(
	X, Y, test_size=0.3, random_state=0)