Skip to content

Instantly share code, notes, and snippets.

@pishangujeniya
Last active October 24, 2020 06:04
Show Gist options
  • Save pishangujeniya/666f0a746398ca32add94b6cb2184a3e to your computer and use it in GitHub Desktop.
Save pishangujeniya/666f0a746398ca32add94b6cb2184a3e to your computer and use it in GitHub Desktop.
import sys
import pandas as pd
from os import path, makedirs
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
def current_milli_time(): return int(round(time.time() * 1000))
OUTPUT_FILE_DIRECTORY = "C:/ask_laftan_alnamaz/"
CHROME_DRIVER_PATH = "C:/chrome_driver/chromedriver.exe"
PAGE_LOAD_WAIT_TIME = 60
makedirs(OUTPUT_FILE_DIRECTORY, exist_ok=True)
driver = webdriver.Chrome(CHROME_DRIVER_PATH)
driver.implicitly_wait(PAGE_LOAD_WAIT_TIME)
episodelist = [
"23629",
"23759",
"23907",
"24131",
"24267",
"24378",
"24472",
"24587",
"24803",
"24970",
"25244",
"25438",
"25639",
"25844",
"26068",
"26242",
"26492",
"26656",
"26823",
"26990",
"27150",
"27317",
"27474",
"27636",
"27909",
"28072",
"28230",
"28518",
"28926",
"29430",
"29895"
]
quality = 1080
allEpisodePageLinks = []
try:
for episodeCodeIndex, singleEpisodeCode in enumerate(episodelist):
for partIndex in range(1, 9):
showtvlink = 'http://www.showtv.com.tr/dizi/tum_bolumler/ask-laftan-anlamaz-sezon-1-bolum-1-parca-' + \
str(partIndex) + '-izle/' + str(singleEpisodeCode)
parsedname = "Ask_Laftan_Anlamaz_Season_1_Episode_" + \
str(episodeCodeIndex + 1) + "_Part_" + \
str(partIndex) + "-" + str(quality) + "p.mp4"
driver.get(showtvlink)
elem = driver.find_element_by_class_name("vjs-tech")
cdnVideoLink = elem.get_attribute("src")
print(cdnVideoLink)
underscoreIndex = cdnVideoLink.find("_")
cdnVideoLink = cdnVideoLink[:underscoreIndex] + \
str("_1920x1080.mp4")
print(cdnVideoLink)
episodeNameElem = driver.find_element_by_class_name("label")
episodeOfficialName= driver.execute_script("return arguments[0].innerText;", episodeNameElem)
episodeInfo = {
"episodeCode": singleEpisodeCode,
"episodeNumber": (episodeCodeIndex + 1),
"episodePartNumber": partIndex,
"episodeName": episodeOfficialName,
"episodePageLink": showtvlink,
"episodeVideoLink": cdnVideoLink
}
allEpisodePageLinks.append(episodeInfo)
except:
print("Oops!", sys.exc_info()[0], "occured.")
finally:
print(allEpisodePageLinks)
output_path_final = path.join(
OUTPUT_FILE_DIRECTORY, 'ask_laftan_alnamaz_'+str(current_milli_time())+'_.csv')
keys = [
"episodeCode",
"episodeNumber",
"episodePartNumber",
"episodeName",
"episodePageLink",
"episodeVideoLink"
]
df = pd.DataFrame(allEpisodePageLinks)
df.to_csv(output_path_final)
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment