Skip to content

Instantly share code, notes, and snippets.

# Training the data and predicting
# Logistic regression
from sklearn.linear_model import LogisticRegression
# fit a model
lrclf = LogisticRegression(solver='lbfgs')
lrclf.fit(X_train_scaled, y_train)
y_pred_lr = lrclf.predict(X_test_scaled)
# predict probabilities
lr_probs = model.predict_proba(X_test_scaled)
X_train = combined[combined['train'] == 1]
X_test = combined[combined['train'] == 0]
X_train.drop(['train'], axis=1, inplace=True)
X_test.drop(['train'], axis=1, inplace=True)
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.fit_transform(X_test), columns=X_test.columns)
X_train['train'] = 1
X_test['train'] = 0
combined = pd.concat([X_train, X_test], axis=0)
combined = pd.get_dummies(combined, prefix_sep='_')
combined.head()
# Loading and preprocessing testing data
raw_data_test = pd.read_csv('au_test.csv')
raw_data_test.head()
raw_data_test = raw_data_test.drop(['fnlwgt', 'education'], axis=1)
X_test = raw_data_test.iloc[:,:12]
y_test = raw_data_test.iloc[:,-1]
le = LabelEncoder()
y_test = le.fit_transform(y_test)
y_test = pd.DataFrame(y_test,columns=['Output_class'])
raw_data_train = raw_data_train.drop(['fnlwgt', 'education'], axis=1)
X_train = raw_data_train.iloc[:,:12]
y_train = raw_data_train.iloc[:,-1]
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_train = pd.DataFrame(y_train,columns=['Output_class'])
print (raw_data_train.head())
cat = len(X_train.select_dtypes(include=['object']).columns)
num = len(X_train.select_dtypes(include=['int64','float64']).columns)
X_train = raw_data_train.iloc[:,:14]
y_train = raw_data_train.iloc[:,-1]
le = LabelEncoder()
y_train = le.fit_transform(y_train)
# Exploratory data analysis
df = pd.DataFrame(y_train, columns=['Output_class'])
test = pd.concat([df,raw_data_train], axis=1)
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import seaborn as sns
import numpy as np
raw_data_train = pd.read_csv('au_train.csv')
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import seaborn as sns
import numpy as np
raw_data_train = pd.read_csv('au_train.csv')
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
page = requests.get('https://www.azlyrics.com/a/abovebeyond.html')
soup = BeautifulSoup(page.text, "lxml")
all_lyrics = ""
for link in soup.select("a[href*=abovebeyond]"):
urllink = link.get('href')
urllink = 'https://www.azlyrics.com'+ urllink[2:]
page = requests.get(urllink)
tree = html.fromstring(page.content)
data = tree.xpath('/html/body/div[3]/div/div[2]/div[5]//text()')
lyrics = ''.join(data[1:])