This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import requests | |
import urllib | |
import time | |
import re | |
# Data Visualization | |
from plotly import tools | |
import chart_studio |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from time import sleep | |
from random import randint | |
from tqdm import notebook as tqdm | |
### Import Modules and Set Perimeters for Pytrends ### | |
from pytrends.request import TrendReq | |
### For Ticker Information | |
import yfinance as yf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Import Libraries ### | |
import urllib | |
import requests | |
import pandas as pd | |
from tqdm.notebook import tqdm | |
from urllib.parse import urlparse | |
### Load API Key | |
api_key = '' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
term = input('What keyword would you like to explore?') | |
df1 = pd.DataFrame(parse_response(requests.get(build_seo_urls(phrase=term)).content)) | |
try: | |
keyword_list = secondary_layer(crawl_urls=df1['Url']) | |
except KeyError as e: | |
raise Exception("The keyword you have inputted is either not in SEMrush's database or your input was incorrectly submitted. Please rerun and try again.") | |
third_layer = third_layer_setup(second_layer_kw=keyword_list) | |
third_layer = third_layer.merge(keyword_list[['Keyword','Search Volume', 'CPC', 'Competition']], on="Keyword", how='left') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def log_parse(data): | |
# Response Size | |
try: | |
size = re.search(r'[0-9] (\d{1,4})', data).group(1) | |
except AttributeError as e: | |
size = 'n/a' | |
# Server Response | |
try: | |
server_response = re.search(r'http.*?[\"]', data).group(0).replace('"', '') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Approach takes into consideration multiple log files stored locally with a .gz format | |
log_file_slugs = os.listdir(r'C:\Users\Derek.Hawkins\Log File Analysis Folder') | |
log_file_slugs = [x for x in log_file_slugs if '.gz' in x] | |
main = [] | |
for i in log_file_slugs: | |
with gzip.open(r'C:\Users\Derek.Hawkins\Log File Analysis\{}'.format(i),'r') as fin: | |
for line in tqdm(fin): | |
try: | |
main.append(log_parse(line.decode())) | |
except AttributeError as e: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import socket | |
log_file = pd.read_pickle('log.pkl') | |
log_file = log_file.ip_address.apply(lambda ip: socket.gethostbyaddr(ip)[0]) | |
# Alternative | |
from crawlerdetect import CrawlerDetect | |
crawler_detect = CrawlerDetect() | |
validate = [] | |
for crawl in log_file.user_agent: | |
data = {'valid':crawler_detect.isCrawler(crawl), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.graph_objects as go | |
pivot = log_file.pivot_table(index='status_code', values='server_response', | |
aggfunc={'server_response':'count'}) | |
fig = go.Figure(data=[go.Pie(labels=pivot.index, values=pivot.server_response)]) | |
fig.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pivot = log_file.pivot_table(index='request_type', values='server_response', | |
aggfunc={'server_response':'count'})\ | |
.sort_values(by='server_response', ascending=False).reset_index().\ | |
rename(columns={'request_type':'file_name', 'server_response':'number_of_pings'}) | |
fig = go.Figure(data=[go.Table( | |
header=dict(values=list(pivot.columns), | |
fill_color='blue', | |
font=dict(color='white', size=12), | |
align='left'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pivot = log_file.pivot_table(index='date', values='server_response', | |
aggfunc={'server_response':'count'}).rename(columns={'server_response':'crawls'}) | |
fig = go.Figure(data=go.Scatter(x=pivot.index, y=pivot.crawls, mode='lines')) | |
fig.update_layout(title='Crawl Rate: example.com', | |
xaxis_title='Date', | |
yaxis_title='Number of Pings by Search Engine') | |
fig.show() |