Aleksandr Ostaenkov alexost66

## web-crawler.py
# This script for web crawling inner links web site
# Optimized for Python3
# Original link: http://xahlee.info/perl-python/python_simple_web_crawler.html
# 15.10.2016

import requests
import urllib.parse as parse
import pprint
from bs4 import BeautifulSoup

## multi-thread.py
# Source: http://www.craigaddyman.com/python-queues-and-multi-threading/

from datetime import datetime
import queue
from threading import Thread

from bs4 import BeautifulSoup
import requests

startTime = datetime.now()

## google-images-save.py
from bs4 import BeautifulSoup
import requests
import os
import urllib.parse
from os.path import basename
import json

query = "chinatown"
query = query.split()
query = '+'.join(query)

## emails.sh
# Script check email addresses for companies on breaches
#!/bin/bash
rm found-emails.txt
rm hacked-emails.txt
clear
echo -e "Enter Target Domain:"
echo -e ""
read hname
clear
echo -e "[+] Email Recon Started"

## wget-urls-with-responces
# It's need to collect all links from the site using wget and get response statuses for each of the links
wget --tries=3 --spider -r -l 10 -w .1 -nc -np -nd https://yandex.com.tr/company/ -R bmp,css,gif,ico,jpg,jpeg,js,mp3,mp4,pdf,png,swf,txt,xml,xls,zip 2>&1 | tee wglog
- tries=3 the number of retries
- l 10 specify the depth of links
- w .1 if the server does not respond then set timeout
- nc no-clobber after the connection is down, it starts from last time
- np no-parent does not go to the directory above
- nd no-directory does not create a structure of site directories
- R prevents files from being downloaded
- tee wglog all output is written to file

## rotate_proxy.py
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import random

ua = UserAgent()
proxies = []

def main():
	proxies_req = Request('https://www.sslproxies.org/')
	# This script for web crawling inner links web site
	# Optimized for Python3
	# Original link: http://xahlee.info/perl-python/python_simple_web_crawler.html
	# 15.10.2016

	import requests
	import urllib.parse as parse
	import pprint
	from bs4 import BeautifulSoup
	# Source: http://www.craigaddyman.com/python-queues-and-multi-threading/

	from datetime import datetime
	import queue
	from threading import Thread

	from bs4 import BeautifulSoup
	import requests

	startTime = datetime.now()
	from bs4 import BeautifulSoup
	import requests
	import os
	import urllib.parse
	from os.path import basename
	import json

	query = "chinatown"
	query = query.split()
	query = '+'.join(query)
	# Script check email addresses for companies on breaches
	#!/bin/bash
	rm found-emails.txt
	rm hacked-emails.txt
	clear
	echo -e "Enter Target Domain:"
	echo -e ""
	read hname
	clear
	echo -e "[+] Email Recon Started"
	# It's need to collect all links from the site using wget and get response statuses for each of the links
	wget --tries=3 --spider -r -l 10 -w .1 -nc -np -nd https://yandex.com.tr/company/ -R bmp,css,gif,ico,jpg,jpeg,js,mp3,mp4,pdf,png,swf,txt,xml,xls,zip 2>&1 \| tee wglog
	- tries=3 the number of retries
	- l 10 specify the depth of links
	- w .1 if the server does not respond then set timeout
	- nc no-clobber after the connection is down, it starts from last time
	- np no-parent does not go to the directory above
	- nd no-directory does not create a structure of site directories
	- R prevents files from being downloaded
	- tee wglog all output is written to file
	from urllib.request import Request, urlopen
	from bs4 import BeautifulSoup
	from fake_useragent import UserAgent
	import random

	ua = UserAgent()
	proxies = []

	def main():
	proxies_req = Request('https://www.sslproxies.org/')