Alex Mitrani amitrani6

## scamming_the_scammer_snippet_1.py
#The following code is from 'Engineer Man's' YouTube video titled 'Showing a Craigslist scammer who's boss using Python'
import requests
import os
import random
import string
import json

#Character list for password creation and a random seed to initialize the number generation
chars = string.ascii_letters + string.digits + '!@#%^&()'
random.seed = (os.urandom(1024))

## scamming_the_scammer_snippet_2.py
#This function returns a random amount of random numbers as a string
def get_random_numbers():
    n = random.randint(1,6) #The number of random numbers we want
    rand_nums = ''

    for i in range(0,n):
        rand_nums += str(random.randint(0,11))

    return rand_nums

## mongo_iii_creation_of_database.py
#This code creates the mongo collection once you initialize a mongo
#shell in a terminal window by simply typing the command 'mongo'

import pymongo

#The default URI is revealed once a mongo shell is initiated
#If necessary, replace the URI with the port that the database
#is located in
client = pymongo.MongoClient("mongodb://127.0.0.1:27017/")

## mongo_iii_add_links_to_collection.py
#This code adds a list of dictionaries representing television urls and adds them
#to a mongo collection. It returns an array of ids for the inserted documents

#'first_page_tv_links' is a variable storing the list of dictionaries,
#each element in the list follows the format: {'url': tv_link}

#The collection is called 'product_info_collection'

insertion_results = product_info_collection.insert_many(first_page_tv_links)

## mongo_iii_update_record.py
#This block of code updates each television in the Mongo collection with
#the product information found on its specific page, which is accessed
#by the url obtained earlier

#This query obtains each url key-value pair in the collection
#The key-value pair is the unique identifier for the televisions
#I do not need the '_id' field so I set it to 0
url_query = product_info_collection.find({}, {'_id': 0, 'url': 1})

#This for loop iterates through each document found in the query

## photo_program.py
#This function is adapted from user derricw's answer on from Stack Overflow
#https://stackoverflow.com/questions/34588464/python-how-to-capture-image-from-webcam-on-click-using-opencv/34588758#34588758

#The function takes in a file name and then takes a picture of the user while
#displaying a camera. Once the photo is taken (by pressing the space bar) the
#photo is saved as the file name given as a parameter of the function. The
#escape key closes the camera window. If the escape key is pressed before the
#picture is taken then no new file is created.

import cv2

## naive_bayes_classifier.py
# The following code is adapted from Learn.Co's
# Naive Bayes Classifier lessons and labs

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB

#All the feature columns from the 'countries_classify' data frame
X = countries_classify.iloc[:, 3:-1].values

## cleaning_the_text.py
# A function to open the raw text
def open_file(file_path):

    with open(file_path, 'r') as file:
        raw_text = file.read().replace('\n', ' ')

    return raw_text


# A function that takes in the raw text of a string, removes special characters and

## process_text.py
#Import the necessary libraries
import nltk
from nltk.stem import WordNetLemmatizer

#Initialize the Wordnet Lemmatizer
lemmatizer = WordNetLemmatizer()

#A function to lemmatize raw text, returns a list of lemmatized tokens
def lemmatize_text(tokenized_text):
    return ' '.join([lemmatizer.lemmatize(w) for w in tokenized_text])

## NBC_NLP.py
#The code for creating a Naive Bayes Classifier from text data stored in a pandas data frame

#Train Test Split The Data Frame
X_train, X_test, y_train, y_test = train_test_split(df.lemmatize_text, df.show_name, test_size=0.2, random_state=42)

#create a Scikit-Learn pipeline for Naive Bayes Classification
text_clf = Pipeline([('count_vectorizer', CountVectorizer()),
                     ('tfidf_vectorizer', TfidfTransformer()),
                     ('clf', MultinomialNB())
                    ])
	#The following code is from 'Engineer Man's' YouTube video titled 'Showing a Craigslist scammer who's boss using Python'
	import requests
	import os
	import random
	import string
	import json

	#Character list for password creation and a random seed to initialize the number generation
	chars = string.ascii_letters + string.digits + '!@#%^&()'
	random.seed = (os.urandom(1024))
	#This function returns a random amount of random numbers as a string
	def get_random_numbers():
	n = random.randint(1,6) #The number of random numbers we want
	rand_nums = ''

	for i in range(0,n):
	rand_nums += str(random.randint(0,11))

	return rand_nums
	#This code creates the mongo collection once you initialize a mongo
	#shell in a terminal window by simply typing the command 'mongo'

	import pymongo

	#The default URI is revealed once a mongo shell is initiated
	#If necessary, replace the URI with the port that the database
	#is located in
	client = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
	#This code adds a list of dictionaries representing television urls and adds them
	#to a mongo collection. It returns an array of ids for the inserted documents

	#'first_page_tv_links' is a variable storing the list of dictionaries,
	#each element in the list follows the format: {'url': tv_link}

	#The collection is called 'product_info_collection'

	insertion_results = product_info_collection.insert_many(first_page_tv_links)
	#This block of code updates each television in the Mongo collection with
	#the product information found on its specific page, which is accessed
	#by the url obtained earlier

	#This query obtains each url key-value pair in the collection
	#The key-value pair is the unique identifier for the televisions
	#I do not need the '_id' field so I set it to 0
	url_query = product_info_collection.find({}, {'_id': 0, 'url': 1})

	#This for loop iterates through each document found in the query
	#This function is adapted from user derricw's answer on from Stack Overflow
	#https://stackoverflow.com/questions/34588464/python-how-to-capture-image-from-webcam-on-click-using-opencv/34588758#34588758

	#The function takes in a file name and then takes a picture of the user while
	#displaying a camera. Once the photo is taken (by pressing the space bar) the
	#photo is saved as the file name given as a parameter of the function. The
	#escape key closes the camera window. If the escape key is pressed before the
	#picture is taken then no new file is created.

	import cv2
	# The following code is adapted from Learn.Co's
	# Naive Bayes Classifier lessons and labs

	from sklearn.preprocessing import LabelEncoder
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.naive_bayes import GaussianNB

	#All the feature columns from the 'countries_classify' data frame
	X = countries_classify.iloc[:, 3:-1].values
	# A function to open the raw text
	def open_file(file_path):

	with open(file_path, 'r') as file:
	raw_text = file.read().replace('\n', ' ')

	return raw_text


	# A function that takes in the raw text of a string, removes special characters and
	#Import the necessary libraries
	import nltk
	from nltk.stem import WordNetLemmatizer

	#Initialize the Wordnet Lemmatizer
	lemmatizer = WordNetLemmatizer()

	#A function to lemmatize raw text, returns a list of lemmatized tokens
	def lemmatize_text(tokenized_text):
	return ' '.join([lemmatizer.lemmatize(w) for w in tokenized_text])
	#The code for creating a Naive Bayes Classifier from text data stored in a pandas data frame

	#Train Test Split The Data Frame
	X_train, X_test, y_train, y_test = train_test_split(df.lemmatize_text, df.show_name, test_size=0.2, random_state=42)

	#create a Scikit-Learn pipeline for Naive Bayes Classification
	text_clf = Pipeline([('count_vectorizer', CountVectorizer()),
	('tfidf_vectorizer', TfidfTransformer()),
	('clf', MultinomialNB())
	])