Lakshay lakshay-arora

## multiprocessing_apply_1.py
%%time
data['Number_of_divisor'] = data.Number.apply(countDivisors)

## multiprocessing_pool_1.py
%%time

pool = mp.Pool(processes = (mp.cpu_count() - 1))
answer = pool.map(countDivisors,random_data)
pool.close()
pool.join()

## goibibo_data_request.py
"""
Web Scraping - Beautiful Soup
"""

# importing required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

# target URL to scrap

## goibibo_data_filter.py
# find all the sections with specifiedd class name
cards_data = data.find_all('div', attrs={'class', 'width100 fl htlListSeo hotel-tile-srp-container hotel-tile-srp-container-template new-htl-design-tile-main-block'})

# total number of cards
print('Total Number of Cards Found : ', len(cards_data))

# source code of hotel cards
for card in cards_data:
    print(card)

## goibibo_data_filter_2.py
# extract the hotel name and price per room
for card in cards_data:

    # get the hotel name
    hotel_name = card.find('p')

    # get the room price
    room_price = card.find('li', attrs={'class': 'htl-tile-discount-prc'})
    print(hotel_name.text, room_price.text)

## goibibo_data_scraped_with_csv.py
# create a list to store the data
scraped_data = []

for card in cards_data:

    # initialize the dictionary
    card_details = {}

    # get the hotel name
    hotel_name = card.find('p')

## scrap_image_part_1.py
"""
Web Scraping - Scrap Images
"""

# importing required libraries
import requests
from bs4 import BeautifulSoup

# target URL
url = "https://www.goibibo.com/hotels/hotels-in-shimla-ct/"

## scrap_image_part_2.py
# select src tag
image_src = [x['src'] for x in images]

# select only jp format images
image_src = [x for x in image_src if x.endswith('.jpg')]

for image in image_src:
    print(image)

## scrap_image_part_3.py
image_count = 1
for image in image_src:
    with open('image_'+str(image_count)+'.jpg', 'wb') as f:
        res = requests.get(image)
        f.write(res.content)
    image_count = image_count+1

## lazy_1.py
# create a sample list
my_list = [i for i in range(1,10000000)]

# parallelize the data
rdd_0 = sc.parallelize(my_list,3)

rdd_0
	%%time
	data['Number_of_divisor'] = data.Number.apply(countDivisors)
	%%time

	pool = mp.Pool(processes = (mp.cpu_count() - 1))
	answer = pool.map(countDivisors,random_data)
	pool.close()
	pool.join()
	"""
	Web Scraping - Beautiful Soup
	"""

	# importing required libraries
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd

	# target URL to scrap
	# find all the sections with specifiedd class name
	cards_data = data.find_all('div', attrs={'class', 'width100 fl htlListSeo hotel-tile-srp-container hotel-tile-srp-container-template new-htl-design-tile-main-block'})

	# total number of cards
	print('Total Number of Cards Found : ', len(cards_data))

	# source code of hotel cards
	for card in cards_data:
	print(card)
	# extract the hotel name and price per room
	for card in cards_data:

	# get the hotel name
	hotel_name = card.find('p')

	# get the room price
	room_price = card.find('li', attrs={'class': 'htl-tile-discount-prc'})
	print(hotel_name.text, room_price.text)
	# create a list to store the data
	scraped_data = []

	for card in cards_data:

	# initialize the dictionary
	card_details = {}

	# get the hotel name
	hotel_name = card.find('p')
	"""
	Web Scraping - Scrap Images
	"""

	# importing required libraries
	import requests
	from bs4 import BeautifulSoup

	# target URL
	url = "https://www.goibibo.com/hotels/hotels-in-shimla-ct/"
	# select src tag
	image_src = [x['src'] for x in images]

	# select only jp format images
	image_src = [x for x in image_src if x.endswith('.jpg')]

	for image in image_src:
	print(image)
	image_count = 1
	for image in image_src:
	with open('image_'+str(image_count)+'.jpg', 'wb') as f:
	res = requests.get(image)
	f.write(res.content)
	image_count = image_count+1
	# create a sample list
	my_list = [i for i in range(1,10000000)]

	# parallelize the data
	rdd_0 = sc.parallelize(my_list,3)

	rdd_0