Skip to content

Instantly share code, notes, and snippets.

@CanNuhlar
Created August 2, 2018 19:57
Show Gist options
  • Save CanNuhlar/3cd82676c8772ce7076fc3aa183cf846 to your computer and use it in GitHub Desktop.
Save CanNuhlar/3cd82676c8772ce7076fc3aa183cf846 to your computer and use it in GitHub Desktop.
Created from terminal
# -*- coding: utf-8 -*-
import requests
from HTMLParser import HTMLParser
import re
class PageNumParser(HTMLParser):
def handle_data(self, data):
if "sayfanın" in data:
getContent(int(re.search(r'\d+', data).group()))
class wordParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == "a" and len(attrs) == 1:
for attr in attrs:
word = re.search(r"kelime=(.*)&cesit", attr[1])
print word.group(1)
def getContent(totalPageCount):
currentPage = 0
while totalPageCount > currentPage/60:
pageURL = "http://tdk.gov.tr/index.php?option=com_yazimkilavuzu&view=yazimkilavuzu&kelime1=z&kategori1=yazim_listeli&ayn1=bas&konts=" + str(currentPage)
response = requests.get(pageURL, headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, timeout=15)
wordParser.feed(response.content.split('<table border="1" cellspacing="0" width="100%">')[1].split("</table>")[0])
currentPage = currentPage + 60
pageNumParser = PageNumParser()
wordParser = wordParser()
pageURL = "http://tdk.gov.tr/index.php?option=com_yazimkilavuzu&view=yazimkilavuzu&kelime1=z&kategori1=yazim_listeli&ayn1=bas&konts=0"
response = requests.get(pageURL, headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, timeout=15)
pageNumParser.feed(response.content.split('<span class="comicm">')[1].split("</span>")[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment