wtberry

## Faker_names.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              1 star
            
          
                wtberry
                / Faker_names.ipynb
            
            
              Created
              July 7, 2019 03:15
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Faker_names.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              1 star
            
          
                wtberry
                / Faker_names.ipynb
            
            
              Created
              July 7, 2019 03:15
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## English_Egyptian_name.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              1 star
            
          
                wtberry
                / English_Egyptian_name.ipynb
            
            
              Created
              July 7, 2019 01:58
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## NameClassifier_train_test_split.py
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(word_mat, y, test_size=0.3)

## NameClassifier_train_test_split.py
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(word_mat, y, test_size=0.3)

## NameClassifier_model_instantiate.py
from sklearn.naive_bayes import MultinomialNB

# instantiate the model as clf(classifier) and train it
clf = MultinomialNB()
clf.fit(x_train, y_train)

## NameClassifier_CountVectorizer.py
from sklearn.feature_extraction.text import CountVectorizer

# Initialize and fit CountVectorizer with given text documents
vectorizer = CountVectorizer().fit(df['name'])

# use the vectorizer to transform the document into word count vectors (Sparse)
word_mat = vectorizer.transform(df['name'])

## NameClassifer_label_encode.py
from sklearn.preprocessing import OrdinaryEncoder

# creating mapping from unique label texts to unique integers
# note this can be re-used to encode and decode the labels after as well
encoder = OrdinaryEncoder().fit(df['code'])

# using the encoder to encode the entire dataset
y = encoder.transform(encoder)

## NameClassifier_data_load.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                wtberry
                / NameClassifier_data_load.ipynb
            
            
              Created
              June 23, 2019 21:53
            
              
                medium/NameClassifier/dataload
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## NameClassifier1.py
import pandas as pd
import os
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

# setting up path to the data file
PATH = os.path.dirname(os.path.realpath(__file__))
PATH = os.path.join(PATH, 'data')
print(PATH)
	from sklearn.model_selection import train_test_split

	x_train, x_test, y_train, y_test = train_test_split(word_mat, y, test_size=0.3)
	from sklearn.naive_bayes import MultinomialNB

	# instantiate the model as clf(classifier) and train it
	clf = MultinomialNB()
	clf.fit(x_train, y_train)
	from sklearn.feature_extraction.text import CountVectorizer

	# Initialize and fit CountVectorizer with given text documents
	vectorizer = CountVectorizer().fit(df['name'])

	# use the vectorizer to transform the document into word count vectors (Sparse)
	word_mat = vectorizer.transform(df['name'])
	from sklearn.preprocessing import OrdinaryEncoder

	# creating mapping from unique label texts to unique integers
	# note this can be re-used to encode and decode the labels after as well
	encoder = OrdinaryEncoder().fit(df['code'])

	# using the encoder to encode the entire dataset
	y = encoder.transform(encoder)
	import pandas as pd
	import os
	import numpy as np
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split

	# setting up path to the data file
	PATH = os.path.dirname(os.path.realpath(__file__))
	PATH = os.path.join(PATH, 'data')
	print(PATH)