Skip to content

Instantly share code, notes, and snippets.

View CharlyWargnier's full-sized avatar
🎈

Charly Wargnier CharlyWargnier

🎈
View GitHub Profile
df[['url','header']] = df["header"].str.split(" ", 1, expand=True)
# Remove slash (/) at the start of each URL
df['url'] = df['url'].str.replace('/','',n=1)
df[['httpProto','header']] = df["header"].str.split("\" ", 1, expand=True)
df[['httpCode','header']] = df["header"].str.split(" ", 1, expand=True)
df[['timeTaken','header']] = df["header"].str.split(" ", 1, expand=True)
df[['urlRef','header']] = df["header"].str.split(" ", 1, expand=True)
df['urlRef'] = df['urlRef'].str.replace('\"', '')
df[['userAgent','header']] = df["header"].str.split("\" ", 1, expand=True)
df[['remainder','header']] = df["header"].str.split('".*LOCATION=30.-', 1, expand=True)
df['header'] = df['header'].str.replace('\"', '')
df = df[['IP','Date','Domain','Method','url','httpProto','httpCode','timeTaken','urlRef','userAgent','header']]
df.rename(columns={"header": "redirURL"}, inplace=True)
df['httpCodeClass'] = pd.np.where(df.httpCode.str.contains("^1.*"), 'Info (1XX)',
pd.np.where(df.httpCode.str.contains("^2.*"), 'Success (2XX)',
pd.np.where(df.httpCode.str.contains("^3.*"), 'Redirects (3XX)',
pd.np.where(df.httpCode.str.contains("^4.*"), 'Client errors (4XX)', 'Server errors (5XX)'))))
df['SEBotClass'] = pd.np.where(df.userAgent.str.contains("YandexBot"), "YandexBot",
pd.np.where(df.userAgent.str.contains("bingbot"), "BingBot",
pd.np.where(df.userAgent.str.contains("DuckDuckBot"), "DuckDuckGo",
pd.np.where(df.userAgent.str.contains("Baiduspider"), "Baidu",
pd.np.where(df.userAgent.str.contains("Googlebot/2.1"), "GoogleBot", "Else")))))