wonsolution/dafont_href scraping

## dafont_href scraping
from urllib.request import urlopen
from urllib.parse import quote_plus
from bs4 import BeautifulSoup
from selenium import webdriver
import time

#dafont에서 폰트 url가져오기

n=1
while 1:

    url= 'https://www.dafont.com/new.php?page={}&nup=3'.format(str(n))
                             #특정페이지부터 시작할 경우 : .format(str(324+n)) -->325페이지부터 시작

    for s in url:
        url

    n += 1

    driver = webdriver.Chrome()
    driver.get(url)

    time.sleep(5)

    html = driver.page_source
    soup = BeautifulSoup(html)

    dafont = soup.select('.preview')


    for i in dafont:
        fList = 'https://www.dafont.com/' + i.a['href'] +('\n')
        print(fList)
        # print()

        # 파일에 덮어쓰지 않고, 이어쓰는 경우 'w' 옵션 대신, 'a' 옵션을 주면 된다.
        f = open("dafont.txt","a")
        f.write(fList)
        f.close()

    driver.close()

    time.sleep(60)

## instagram scraping
from urllib.request import urlopen
from urllib.parse import quote_plus
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import datetime

while 1:
    # while문에 적합하지 않음.
    # baseUrl ='https://www.instagram.com/quanhaha79'
    # pUrl = input('좌표 : ')
    # url = baseUrl +pUrl

    # 반복문일 경우 바로 접속 할 수 있도록 변수를 넣지않고...
    # 크롤링을 하고 싶은 사람 혹은 페이지의 주소/ID를 알 경우
    url = 'https://www.instagram.com/quanhaha79'

    # 키워드를 검색해 찾아 올 때
    pUrl = 'quanhaha79'
    # url = 'https://www.instagram.com/explore/tags/{}/'.format(pUrl)

    driver = webdriver.Chrome()
    driver.get(url)

    time.sleep(1)

    html = driver.page_source
    soup = BeautifulSoup(html)

    insta = soup.select('.v1Nh3.kIKUG._bz0w')


    n = 1
    nowTime = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
    for i in insta:
        print('https://www.instagram.com/'+i.a['href'])
        imgUrl = i.select_one('.KL4Bh').img['src']
        with urlopen(imgUrl) as f:
            with open('./haha/' + nowTime + '_' + pUrl + str(n) + '.jpg', 'wb') as h:
                img = f.read()
                h.write(img)
        n +=1
        print(imgUrl)
        print()

    driver.close()

    print('완료')

    time.sleep(60)
	from urllib.request import urlopen
	from urllib.parse import quote_plus
	from bs4 import BeautifulSoup
	from selenium import webdriver
	import time

	#dafont에서 폰트 url가져오기

	n=1
	while 1:

	url= 'https://www.dafont.com/new.php?page={}&nup=3'.format(str(n))
	#특정페이지부터 시작할 경우 : .format(str(324+n)) -->325페이지부터 시작

	for s in url:
	url

	n += 1

	driver = webdriver.Chrome()
	driver.get(url)

	time.sleep(5)

	html = driver.page_source
	soup = BeautifulSoup(html)

	dafont = soup.select('.preview')


	for i in dafont:
	fList = 'https://www.dafont.com/' + i.a['href'] +('\n')
	print(fList)
	# print()

	# 파일에 덮어쓰지 않고, 이어쓰는 경우 'w' 옵션 대신, 'a' 옵션을 주면 된다.
	f = open("dafont.txt","a")
	f.write(fList)
	f.close()

	driver.close()

	time.sleep(60)