Skip to content

Instantly share code, notes, and snippets.

@Kyu
Last active May 15, 2017 20:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kyu/aa34f7f02b639ebd8e2892eb3f2900ac to your computer and use it in GitHub Desktop.
Save Kyu/aa34f7f02b639ebd8e2892eb3f2900ac to your computer and use it in GitHub Desktop.
Crawls Bloc Alliances for Economy and Military info.
from bs4 import BeautifulSoup
import aiohttp
import asyncio
from datetime import datetime
import csv
loop = asyncio.get_event_loop()
client = aiohttp.ClientSession(loop=loop)
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'}
BLOC = 'http://blocgame.com'
DATA = {'username': 'USERNAME', 'password': 'PASSWORD', 'login': ''}
class Nation:
def __init__(self, soup):
self.name = soup.find(id='nationtitle').text
self.gdp = int(soup.find(text='Gross Domestic Product:').parent
.next_sibling.next_sibling.contents[0]
.string[1:][:4])
factoryString = soup.find(text='Industry:').parent.next_sibling\
.next_sibling.contents[0].string
uniString = soup.find(text='Universities:').parent.next_sibling\
.next_sibling.contents[0].string
if uniString == 'None':
self.universities = 0
elif uniString == '1 university':
self.universities = 1
else:
self.universities = int(uniString[:-13])
# And Literacy
if factoryString == 'None':
self.factories = 0
elif factoryString == '1 factory':
self.factories = 1
else:
self.factories = int(factoryString[:-10])
if soup.find(text='Oil Production:').parent.next_sibling.next_sibling\
.contents[0].string == 'None':
self.wells = 0
else:
self.wells = int(soup.find(text='Oil Production:').parent
.next_sibling.next_sibling.contents[0]
.string[:-15])
if soup.find(text='Raw Material Production:').parent.next_sibling\
.next_sibling\
.contents[0].string\
== 'None':
self.mines = 0
else:
self.mines = int(soup.find(text='Raw Material Production:')
.parent.next_sibling.next_sibling
.contents[0].string[:-23])
self.region = (soup.find(text='Region:').parent.next_sibling
.next_sibling.contents[0].string)
self.troops = int(soup.find(text='Army Size:').parent.next_sibling
.next_sibling.contents[0].string[:-18])
self.tech = (soup.find(text='Equipment:').parent.next_sibling
.next_sibling.contents[0].string)
for i in soup.find(text='Airforce:').parent.next_sibling.next_sibling\
.contents[0].contents[1].strings:
self.airforce = i
for i in soup.find(text='Navy:').parent.next_sibling.next_sibling\
.contents[0].contents[1].strings:
self.navy = i
class Alliance:
def __init__(self, name, nations):
self.name = name
self.nations = nations
self.gdp = 0
self.avggdp = 0
self.factories = 0
self.universities = 0
self.avgfactories = 0
self.wells = 0
self.avgwells = 0
self.mines = 0
self.avgmines = 0
self.troopLevels = {
'mex': 0,
'car': 0,
'gra': 0,
'ama': 0,
'con': 0,
'waf': 0,
'gui': 0,
'eaf': 0,
'cng': 0,
'saf': 0,
'atl': 0,
'egy': 0,
'mes': 0,
'ara': 0,
'per': 0,
'sub': 0,
'chi': 0,
'pac': 0,
'sea': 0,
'ind': 0,
'tot': 0,
'avg': 0
}
self.techLevels = {
'sta': 0,
'fin': 0,
'ww1': 0,
'ww2': 0,
'kor': 0,
'vie': 0,
'alm': 0,
'pgw': 0,
'adv': 0,
}
self.airforceLevels = {
'non': 0,
'mea': 0,
'sma': 0,
'med': 0,
'som': 0,
'lar': 0,
'pow': 0,
'vpo': 0
}
self.navyLevels = {
'non': 0,
'dng': 0,
'cst': 0,
'grn': 0,
'blu': 0,
'pbl': 0,
}
print('Indexing %s...' % self.name)
for i in self.nations:
self.gdp += i.gdp
self.factories += i.factories
self.universities += i.universities
self.wells += i.wells
self.mines += i.mines
if i.region == 'Mesoamerica':
self.troopLevels['mex'] += i.troops
elif i.region == 'Caribbean':
self.troopLevels['car'] += i.troops
elif i.region == 'Gran Colombia':
self.troopLevels['gra'] += i.troops
elif i.region == 'Amazonia':
self.troopLevels['ama'] += i.troops
elif i.region == 'Southern Cone':
self.troopLevels['con'] += i.troops
elif i.region == 'West Africa':
self.troopLevels['waf'] += i.troops
elif i.region == 'Guinea':
self.troopLevels['gui'] += i.troops
elif i.region == 'East Africa':
self.troopLevels['eaf'] += i.troops
elif i.region == 'Congo':
self.troopLevels['cng'] += i.troops
elif i.region == 'Southern Africa':
self.troopLevels['saf'] += i.troops
elif i.region == 'Atlas':
self.troopLevels['atl'] += i.troops
elif i.region == 'Egypt':
self.troopLevels['egy'] += i.troops
elif i.region == 'Mesopotamia':
self.troopLevels['mes'] += i.troops
elif i.region == 'Arabia':
self.troopLevels['ara'] += i.troops
elif i.region == 'Persia':
self.troopLevels['per'] += i.troops
elif i.region == 'The Subcontinent':
self.troopLevels['sub'] += i.troops
elif i.region == 'China':
self.troopLevels['chi'] += i.troops
elif i.region == 'Pacific Rim':
self.troopLevels['pac'] += i.troops
elif i.region == 'Indochina':
self.troopLevels['sea'] += i.troops
else:
self.troopLevels['ind'] += i.troops
self.troopLevels['tot'] += i.troops
if i.tech == 'Stone Age':
self.techLevels['sta'] += 1
elif i.tech == 'Finest of the 19th century':
self.techLevels['fin'] += 1
elif i.tech == 'First World War surplus':
self.techLevels['ww1'] += 1
elif i.tech == 'Second World War surplus':
self.techLevels['ww2'] += 1
elif i.tech == 'Korean War surplus':
self.techLevels['kor'] += 1
elif i.tech == 'Vietnam War surplus':
self.techLevels['vie'] += 1
elif i.tech == 'Almost Modern':
self.techLevels['alm'] += 1
elif i.tech == 'Persian Gulf War surplus':
self.techLevels['pgw'] += 1
else:
self.techLevels['adv'] += 1
if i.airforce == ' None':
self.airforceLevels['non'] += 1
elif i.airforce == ' Meagre':
self.airforceLevels['mea'] += 1
elif i.airforce == ' Small':
self.airforceLevels['sma'] += 1
elif i.airforce == ' Mediocre':
self.airforceLevels['med'] += 1
elif i.airforce == ' Somewhat Large':
self.airforceLevels['som'] += 1
elif i.airforce == ' Large':
self.airforceLevels['lar'] += 1
elif i.airforce == ' Powerful':
self.airforceLevels['pow'] += 1
elif i.airforce == ' Very Powerful':
self.airforceLevels['vpo'] += 1
if i.navy == ' None':
self.navyLevels['non'] += 1
elif i.navy == ' Some Dinghies':
self.navyLevels['dng'] += 1
elif i.navy == ' Coast Guard':
self.navyLevels['cst'] += 1
elif i.navy == ' Green Water':
self.navyLevels['grn'] += 1
elif i.navy == ' Blue Water':
self.navyLevels['blu'] += 1
else:
self.navyLevels['pbl'] += 1
self.members = len(self.nations)
self.avggdp = round(self.gdp / float(len(self.nations)), 1)
self.avgfactories = round(self.factories / float(len(self.nations)),
1)
self.avguniversities = round(self.universities / float(len(self.nations)),
1)
self.avgwells = round(self.wells / float(len(self.nations)), 1)
self.avgmines = round(self.mines / float(len(self.nations)), 1)
self.troopLevels['avg'] = round(self.troopLevels['tot']
/ float(len(self.nations)), 1)
print("Finished Indexing {}\n".format(self.name))
async def getSoup(url):
try:
async with client.get(url) as response:
html = await response.text()
status = response.status
'''
if "alliance" in url:
print("{0}: {1}".format(url, response.status))
'''
soup = BeautifulSoup(html, 'html.parser')
ht = "<html>{}</html>"
if "alliance" not in url.lower():
info = soup.find_all("table", attrs={"class":
"table table-striped table-condensed table-hover table-bordered"})
name = soup.find(id='nationtitle')
name = name if name else ""
new_html = ht.format(str(name) + "\n".join([str(i) for i in info]))
soup = BeautifulSoup(new_html, 'html.parser')
if "alliancestats" in url.lower():
info = soup.find_all('table', attrs={'class' : 'table table-striped '})
name = soup.find('h1', text=True)
new_html = ht.format(str(name) + "\n".join([str(i) for i in info]))
soup = BeautifulSoup(new_html, 'html.parser')
if status == 404:
try:
soup = await getSoup(url[20:])
except:
return
except Exception as e:
print("getSoup| {0}:{1} on {2}".format(type(e).__name__, str(e), url))
return
return soup
async def scrape(pages):
allianceLinks = []
for i in range(1, 1 + pages):
soup = await getSoup(BLOC + '/alliancerankings.php?page={}'.format(i))
for link in soup.find_all('a'):
if link.get('href') in allianceLinks:
continue
elif 'alliancestats.php?allianceid=' in str(link.get('href')):
allianceLinks.append(link.get('href'))
alliances = []
for i in allianceLinks:
nations = []
nationLinks = []
soup = await getSoup(BLOC + "/" + i)
name = soup.find('h1', text=True).text
for link in soup.find_all('a'):
if 'stats.php?id=' in str(link.get('href')):
if link.get('href') not in nationLinks:
nationLinks.append(link.get('href'))
try:
for n in nationLinks:
nation = Nation(await getSoup(BLOC + '/{}'.format(n)))
nations.append(nation)
alliance = Alliance(name, nations)
alliances.append(alliance)
except Exception as e:
print("scrape| {0}:{1} on {2}".format(type(e).__name__, str(e), i))
continue
return alliances
def writeCSV(name, alliances):
with open(name, 'w') as index:
writer = csv.writer(index, quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Alliance'] + [i.name for i in alliances])
writer.writerow(['Members'] + [i.members for i in alliances])
writer.writerow(['GDP'] + [i.gdp for i in alliances])
writer.writerow(['Average GDP'] + [i.avggdp for i in alliances])
writer.writerow(['Factories']
+ [i.factories for i in alliances])
writer.writerow(['Average Factories']
+ [i.avgfactories for i in alliances])
writer.writerow(['Universities']
+ [i.universities for i in alliances])
writer.writerow(['Average Universities']
+ [i.avguniversities for i in alliances])
writer.writerow(['Oil Wells']
+ [i.wells for i in alliances])
writer.writerow(['Average Oil Wells']
+ [i.avgwells for i in alliances])
writer.writerow(['Mines']
+ [i.mines for i in alliances])
writer.writerow(['Average Mines']
+ [i.avgmines for i in alliances])
writer.writerow(['Troops'])
writer.writerow(['Mesoamerica']
+ [i.troopLevels['mex'] for i in alliances])
writer.writerow(['Caribbean']
+ [i.troopLevels['car'] for i in alliances])
writer.writerow(['Gran Colombia']
+ [i.troopLevels['gra'] for i in alliances])
writer.writerow(['Amazonia']
+ [i.troopLevels['ama'] for i in alliances])
writer.writerow(['Southern Cone']
+ [i.troopLevels['con'] for i in alliances])
writer.writerow(['West Africa']
+ [i.troopLevels['waf'] for i in alliances])
writer.writerow(['Guinea']
+ [i.troopLevels['gui'] for i in alliances])
writer.writerow(['East Africa']
+ [i.troopLevels['eaf'] for i in alliances])
writer.writerow(['Congo']
+ [i.troopLevels['cng'] for i in alliances])
writer.writerow(['Southern Africa']
+ [i.troopLevels['saf'] for i in alliances])
writer.writerow(['Atlas']
+ [i.troopLevels['atl'] for i in alliances])
writer.writerow(['Egypt']
+ [i.troopLevels['egy'] for i in alliances])
writer.writerow(['Mesopotamia']
+ [i.troopLevels['mes'] for i in alliances])
writer.writerow(['Arabia']
+ [i.troopLevels['ara'] for i in alliances])
writer.writerow(['Persia']
+ [i.troopLevels['per'] for i in alliances])
writer.writerow(['The Subcontinent']
+ [i.troopLevels['sub'] for i in alliances])
writer.writerow(['China']
+ [i.troopLevels['chi'] for i in alliances])
writer.writerow(['Pacific Rim']
+ [i.troopLevels['pac'] for i in alliances])
writer.writerow(['Indochina']
+ [i.troopLevels['sea'] for i in alliances])
writer.writerow(['East Indies']
+ [i.troopLevels['ind'] for i in alliances])
writer.writerow(['Total']
+ [i.troopLevels['tot'] for i in alliances])
writer.writerow(['Average']
+ [i.troopLevels['avg'] for i in alliances])
writer.writerow(['Tech'])
writer.writerow(['Stone Age']
+ [i.techLevels['sta'] for i in alliances])
writer.writerow(['Finest of the 19th century']
+ [i.techLevels['fin'] for i in alliances])
writer.writerow(['First World War surplus']
+ [i.techLevels['ww1'] for i in alliances])
writer.writerow(['Second World War surplus']
+ [i.techLevels['ww2'] for i in alliances])
writer.writerow(['Korean War surplus']
+ [i.techLevels['kor'] for i in alliances])
writer.writerow(['Vietnam War surplus']
+ [i.techLevels['vie'] for i in alliances])
writer.writerow(['Almost Modern']
+ [i.techLevels['alm'] for i in alliances])
writer.writerow(['Persian Gulf War surplus']
+ [i.techLevels['pgw'] for i in alliances])
writer.writerow(['Advanced']
+ [i.techLevels['adv'] for i in alliances])
writer.writerow(['Airforce'])
writer.writerow(['None']
+ [i.airforceLevels['non'] for i in alliances])
writer.writerow(['Meagre']
+ [i.airforceLevels['mea'] for i in alliances])
writer.writerow(['Small']
+ [i.airforceLevels['sma'] for i in alliances])
writer.writerow(['Mediocre']
+ [i.airforceLevels['med'] for i in alliances])
writer.writerow(['Somewhat Large']
+ [i.airforceLevels['som'] for i in alliances])
writer.writerow(['Large']
+ [i.airforceLevels['lar'] for i in alliances])
writer.writerow(['Powerful']
+ [i.airforceLevels['pow'] for i in alliances])
writer.writerow(['Very Powerful']
+ [i.airforceLevels['vpo'] for i in alliances])
writer.writerow(['Navy'])
writer.writerow(['None']
+ [i.navyLevels['non'] for i in alliances])
writer.writerow(['Some Dinghies']
+ [i.navyLevels['dng'] for i in alliances])
writer.writerow(['Coast Guard']
+ [i.navyLevels['cst'] for i in alliances])
writer.writerow(['Green Water']
+ [i.navyLevels['grn'] for i in alliances])
writer.writerow(['Blue Water']
+ [i.navyLevels['blu'] for i in alliances])
writer.writerow(['Powerful Blue Water']
+ [i.navyLevels['pbl'] for i in alliances])
async def main():
async with client.post(BLOC+'/login.php', headers=HEADERS, data=DATA) as login:
l = await login.text()
async with client.get(BLOC+'/main.php', headers=HEADERS) as main:
lg = await main.text()
if l:
pass
name = BeautifulSoup(lg, 'html.parser').find(id='nationtitle')
if name:
print("Logged in as {}\n".format(name.text))
else:
print("Not logged in")
return
s = await scrape(2)
csv_name = "bloc-alliance-data" + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ".csv"
writeCSV(csv_name, s)
print('Done!1')
if __name__ == '__main__':
loop.run_until_complete(main())
client.close()
loop.close()
Alliance H.B.E Union Of Defense Northern Realms Army of Conquest Condor Union Che Guevara League S.B.T. United Nations NEMESIS MSR Soul Republic The Templars WOJAK Fourth Reich The United Kingdom Ebott Confederacy Pan-African Union ICSA Eastern Bloc Jeb Bush 2016
Members 56 52 51 38 38 35 34 34 28 28 21 20 19 15 14 12 10 9 5 5
GDP 26949 22147 26463 19109 18481 17316 17622 19340 12894 13470 10046 7182 9571 6580 7111 5559 4426 3934 1797 1958
Average GDP 481.2 425.9 518.9 502.9 486.3 494.7 518.3 568.8 460.5 481.1 478.4 359.1 503.7 438.7 507.9 463.2 442.6 437.1 359.4 391.6
Factories 43 39 25 23 26 14 26 21 20 16 15 5 16 7 9 8 6 5 1 1
Average Factories 0.8 0.8 0.5 0.6 0.7 0.4 0.8 0.6 0.7 0.6 0.7 0.2 0.8 0.5 0.6 0.7 0.6 0.6 0.2 0.2
Universities 3 0 1 0 2 2 0 28 0 0 0 0 0 0 0 0 0 0 0 0
Average Universities 0.1 0 0 0 0.1 0.1 0 0.8 0 0 0 0 0 0 0 0 0 0 0 0
Oil Wells 99 114 117 93 77 51 92 152 50 59 35 16 51 28 30 13 17 19 5 19
Average Oil Wells 1.8 2.2 2.3 2.4 2 1.5 2.7 4.5 1.8 2.1 1.7 0.8 2.7 1.9 2.1 1.1 1.7 2.1 1 3.8
Mines 253 226 240 178 213 189 159 257 133 154 81 53 90 67 69 51 47 34 23 23
Average Mines 4.5 4.3 4.7 4.7 5.6 5.4 4.7 7.6 4.8 5.5 3.9 2.6 4.7 4.5 4.9 4.2 4.7 3.8 4.6 4.6
Troops
Mesoamerica 54 0 41 0 179 129 0 0 38 109 37 45 0 36 0 0 0 27 0 0
Caribbean 109 0 26 0 266 533 8 39 101 193 68 57 0 0 71 9 0 0 0 0
Gran Colombia 10 50 50 0 136 120 0 0 50 85 27 0 50 50 76 0 0 0 0 0
Amazonia 57 102 23 0 146 195 0 0 0 0 0 9 0 9 0 0 0 1 0 0
Southern Cone 0 0 0 0 70 44 37 0 0 0 0 80 0 0 0 0 0 0 0 0
West Africa 31 20 25 0 0 0 0 0 0 76 57 20 0 34 0 0 9 0 0 0
Guinea 18 33 101 0 41 41 92 41 0 0 0 40 38 0 0 0 0 0 0 0
East Africa 60 76 0 27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Congo 58 97 0 0 0 0 3 34 38 0 0 20 0 0 0 0 195 33 0 0
Southern Africa 61 252 78 0 63 0 0 0 0 0 0 40 0 0 0 0 59 20 0 0
Atlas 65 31 216 61 40 0 126 39 79 0 5 5 76 0 0 0 0 0 10 0
Egypt 34 86 149 121 73 0 115 555 0 86 31 0 236 76 75 0 51 39 0 0
Mesopotamia 103 36 111 353 32 0 80 62 30 113 0 0 20 43 0 24 0 0 0 0
Arabia 148 60 77 236 87 0 134 105 0 111 19 25 217 80 10 31 0 0 0 0
Persia 26 228 576 229 0 0 83 0 0 74 48 0 0 0 0 0 0 84 19 156
The Subcontinent 350 72 0 73 0 0 0 523 240 30 26 0 0 0 48 0 0 39 0 0
China 253 167 19 0 31 0 166 40 23 77 98 96 7 43 57 26 0 0 67 0
Pacific Rim 178 35 47 0 0 0 308 0 357 0 38 54 0 125 0 209 0 0 0 0
Indochina 131 96 0 35 33 0 0 0 41 28 0 0 0 0 41 49 0 0 0 0
East Indies 147 153 52 92 0 0 94 0 65 0 70 0 0 0 0 0 0 0 0 0
Total 1893 1594 1591 1227 1197 1062 1246 1438 1062 982 524 491 644 496 378 348 314 243 96 156
Average 33.8 30.7 31.2 32.3 31.5 30.3 36.6 42.3 37.9 35.1 25 24.6 33.9 33.1 27 29 31.4 27 19.2 31.2
Tech
Stone Age 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Finest of the 19th century 15 11 9 8 7 6 11 3 8 6 4 15 1 3 1 1 4 2 0 1
First World War surplus 41 41 42 30 31 29 23 31 19 22 17 5 17 12 13 11 6 7 5 4
Second World War surplus 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0
Korean War surplus 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Vietnam War surplus 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Almost Modern 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Persian Gulf War surplus 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Advanced 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Airforce
None 17 19 15 14 9 14 11 2 9 7 10 16 7 5 1 5 4 2 3 2
Meagre 38 25 33 22 26 17 18 22 18 21 11 4 10 10 12 6 6 7 2 1
Small 1 8 3 1 2 4 5 9 1 0 0 0 2 0 1 1 0 0 0 2
Mediocre 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Somewhat Large 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Large 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Powerful 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Very Powerful 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Navy
None 55 51 51 38 37 35 32 33 26 28 21 20 18 15 14 12 10 9 5 5
Some Dinghies 1 1 0 0 1 0 2 1 2 0 0 0 1 0 0 0 0 0 0 0
Coast Guard 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Green Water 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Blue Water 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Powerful Blue Water 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment