Skip to content

Instantly share code, notes, and snippets.

@shivankgtm
Created September 25, 2019 04:34
Show Gist options
  • Save shivankgtm/0a083aeef637b4018afed6e9c3225a0c to your computer and use it in GitHub Desktop.
Save shivankgtm/0a083aeef637b4018afed6e9c3225a0c to your computer and use it in GitHub Desktop.
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
from lxml import html
import requests
import urllib
import numpy as np
import pandas as pd
f = open("win1.html")
document= BeautifulSoup(f.read())
Pitches = []
names = []
Names = []
AngelLinks = []
companies = []
signal = []
market = []
website = []
size = []
Market_New = []
div1 = document.findAll('div', {'class':'base startup'})
for i in div1:
Name = i.findAll('div', {'class':'name'})
names.append(Name)
Company = i.findAll('div', {'class':'pitch'})
companies.append(Company)
Market = i.findAll('div', {'class':'market'})
market.append(Market)
Website = i.findAll('div', {'class':'website'})
website.append(Website)
Size = i.findAll('div', {'class':'column company_size hidden_column'})
for i in companies:
if len(str(i[0].text)) == 0:
Pitches.append('No Pitch Available')
else:
Pitches.append(i[0].text) # PITCHES AVAILABLE
for i in names:
Names.append(i[0].text) # COMPANIES NAMES AVAILABLE
Market = []
for i in market:
Market.append(i[0].text) # MARKET DISCRIPTION AVAILABLE
Website = []
for i in website:
z = (i[0].findAll('a'))
Website.append(z[0].text)
Data = [[Names], [Pitches], [Market], [Website]]
D_data = pd.DataFrame(Data)
#D_data.to_csv('D1_data.csv')
data = {'Company': Names, 'Market':Market, 'Pitches': Pitches, 'Website': Website}
D2 = pd.DataFrame(data)
D2.to_csv('D2.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment