Skip to content

Instantly share code, notes, and snippets.

@yumaueno
Created August 14, 2019 08:48
Show Gist options
  • Save yumaueno/b45b4b9bf008283a66da4ec406149bd7 to your computer and use it in GitHub Desktop.
Save yumaueno/b45b4b9bf008283a66da4ec406149bd7 to your computer and use it in GitHub Desktop.
pythonでスクレイピング
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#ライブラリーインポート
import requests
from bs4 import BeautifulSoup
from time import sleep
#スクレイピング
class Scr():
def __init__(self, urls):
self.urls=urls
def geturl(self):
all_text=[]
for url in self.urls:
r=requests.get(url)
c=r.content
soup=BeautifulSoup(c,"html.parser")
article1_content=soup.find_all("p")
temp=[]
for con in article1_content:
out=con.text
temp.append(out)
text=''.join(temp)
all_text.append(text)
sleep(1)
return all_text
sc=Scr(["https://toukei-lab.com/conjoint","https://toukei-lab.com/correspondence"])
print(sc.geturl())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment