View BlightViolation_ComplianceModel.py
# coding: utf-8
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from datetime import datetime
df=pd.read_csv("train.csv")
View RandomForest_final.py
import glob,struct,os
import pandas as pd
import numpy as np
#names of the columns
names=["Timestamp","Customer ID","Host","Log file","Log sequence no.","Entry type","Entry identifier","User,if","Reporting IP/host","Source IP,if","Source port,if","Destination IP, if","Destination Port, if","Text field1","Text field2","Text field3","Numeric field1","Numeric field2"]
# defining path to the dataset folder
path=r'C:/Users/PARVATHY SARAT/Desktop/FIREWALL'
View Scraping.py
import requests
import pandas as pd
key= " #key "
#iteration to get ids
i=0
#iterate twice, get all the data of 60 search results from 3 pages. Google restricts number of results
#that can be scraped to first three pages of search results
while(i<=2) :
if (i==0):
View Income Classification_DecisionTreeClassifier.py
import pandas as pd
train=pd.read_csv("train.csv")
test=pd.read_csv("test.csv")
train.dtypes
#continuous variables
train.describe()
#categorical variables
categorical=train.dtypes.loc[train.dtypes=="object"].index
categorical
View Spot Checking Classifier
# CART Classification
import pandas as pd
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
dataframe = pd.read_csv("data.csv", names=['ID', 'No.', 'Smth', 'Number', 'Count', 'Count2', 'UDP/TCP', 'RandomNo',
'IP', 'AUDIT/ALLOW/BLOCK'])
array = dataframe.values
X = array[:,0:9]
Y = array[:,9]
seed = 7