Skip to content

Instantly share code, notes, and snippets.

@98yejin
Created May 19, 2021 05:21
Show Gist options
  • Save 98yejin/7bd25371b2b493941f86ed2f146be933 to your computer and use it in GitHub Desktop.
Save 98yejin/7bd25371b2b493941f86ed2f146be933 to your computer and use it in GitHub Desktop.
티스토리 게시글 업데이트 날짜 확인하기
import scrapy
import re
class TistorySpider(scrapy.Spider):
name = 'tistory_update_dates'
allowed_domains = ['tistory.com']
# for n in range(start-of-page, end-of-page)
start_urls = ['https://hello-i-t.tistory.com/category/Algorithm?page='+str(n) for n in range(1, 8)]
def parse(self, response):
print("procesing:"+response.url)
date=response.xpath("//*[@id='mArticle']/div/div").extract()
for d in date:
print(re.sub('[^0-9.:]','', d))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment