wtberry

## NameClassifier1.py
import pandas as pd
import os
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

# setting up path to the data file
PATH = os.path.dirname(os.path.realpath(__file__))
PATH = os.path.join(PATH, 'data')
print(PATH)

## NameClassifier_data_load.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                wtberry
                / NameClassifier_data_load.ipynb
            
            
              Created
              June 23, 2019 21:53
            
              
                medium/NameClassifier/dataload
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## NameClassifer_label_encode.py
from sklearn.preprocessing import OrdinaryEncoder

# creating mapping from unique label texts to unique integers
# note this can be re-used to encode and decode the labels after as well
encoder = OrdinaryEncoder().fit(df['code'])

# using the encoder to encode the entire dataset
y = encoder.transform(encoder)

## NameClassifier_CountVectorizer.py
from sklearn.feature_extraction.text import CountVectorizer

# Initialize and fit CountVectorizer with given text documents
vectorizer = CountVectorizer().fit(df['name'])

# use the vectorizer to transform the document into word count vectors (Sparse)
word_mat = vectorizer.transform(df['name'])

## NameClassifier_model_instantiate.py
from sklearn.naive_bayes import MultinomialNB

# instantiate the model as clf(classifier) and train it
clf = MultinomialNB()
clf.fit(x_train, y_train)

## NameClassifier_train_test_split.py
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(word_mat, y, test_size=0.3)

## NameClassifier_train_test_split.py
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(word_mat, y, test_size=0.3)

## English_Egyptian_name.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                wtberry
                / English_Egyptian_name.ipynb
            
            
              Created
              July 7, 2019 01:58
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Faker_names.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                wtberry
                / Faker_names.ipynb
            
            
              Created
              July 7, 2019 03:15
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Faker_names.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                wtberry
                / Faker_names.ipynb
            
            
              Created
              July 7, 2019 03:15
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import pandas as pd
	import os
	import numpy as np
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split

	# setting up path to the data file
	PATH = os.path.dirname(os.path.realpath(__file__))
	PATH = os.path.join(PATH, 'data')
	print(PATH)
	from sklearn.preprocessing import OrdinaryEncoder

	# creating mapping from unique label texts to unique integers
	# note this can be re-used to encode and decode the labels after as well
	encoder = OrdinaryEncoder().fit(df['code'])

	# using the encoder to encode the entire dataset
	y = encoder.transform(encoder)
	from sklearn.feature_extraction.text import CountVectorizer

	# Initialize and fit CountVectorizer with given text documents
	vectorizer = CountVectorizer().fit(df['name'])

	# use the vectorizer to transform the document into word count vectors (Sparse)
	word_mat = vectorizer.transform(df['name'])
	from sklearn.naive_bayes import MultinomialNB

	# instantiate the model as clf(classifier) and train it
	clf = MultinomialNB()
	clf.fit(x_train, y_train)
	from sklearn.model_selection import train_test_split

	x_train, x_test, y_train, y_test = train_test_split(word_mat, y, test_size=0.3)