View Scraping.py
import requests
import pandas as pd
key= " #key "
#iteration to get ids
i=0
#iterate twice, get all the data of 60 search results from 3 pages
while(i<=2) :
if (i==0):
View Income Classification_DecisionTreeClassifier.py
import pandas as pd
train=pd.read_csv("train.csv")
test=pd.read_csv("test.csv")
train.dtypes
#continuous variables
train.describe()
#categorical variables
categorical=train.dtypes.loc[train.dtypes=="object"].index
categorical
View RandomForest.py
# -*- coding: ut8 -*-
import glob,struct,os
import pandas as pd
import numpy as np
names=["Timestamp","Customer ID","Host","Log file","Log sequence no.","Entry type","Entry identifier","User,if","Reporting IP/host","Source IP,if","Source port,if","Destination IP, if","Destination Port, if","Text field1","Text field2","Text field3","Numeric field1","Numeric field2"]
path=r'C:\\Users\\A\\Desktop\\data'
all_files=glob.glob(os.path.join(path,"*.csv"))
df_each=(pd.read_csv(f,header=None) for f in all_files)
result=pd.concat(df_each)
result.columns=names
View RandomForestmodel.py
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
data=pd.read_csv("test.csv",names=['ID', 'No.', 'Smth', 'Number', 'Count', 'Count2', 'UDP/TCP', 'RandomNo',
'IP', 'AUDIT/ALLOW/BLOCK']) #8999 rows x 10 columns
features=data.columns[:9]
test=pd.read_csv("test.csv",names=['ID', 'No.', 'Smth', 'Number', 'Count', 'Count2', 'UDP/TCP', 'RandomNo',
'IP', 'AUDIT/ALLOW/BLOCK'])
#256 rows x 10 columns
View Spot Checking Classifier
# CART Classification
import pandas as pd
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
dataframe = pd.read_csv("data.csv", names=['ID', 'No.', 'Smth', 'Number', 'Count', 'Count2', 'UDP/TCP', 'RandomNo',
'IP', 'AUDIT/ALLOW/BLOCK'])
array = dataframe.values
X = array[:,0:9]
Y = array[:,9]
seed = 7
View rf_iris.py
#loading libraries
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
#create an object called iris with iris data
iris = load_iris()
#create a dataframe with the 4 feature variables