Skip to content

Instantly share code, notes, and snippets.

@sentientmachine
Forked from lobstrio/coinmarketcap_extract.py
Last active December 28, 2021 20:44
Show Gist options
  • Save sentientmachine/820e0c47b07fe90580e4c758d9c5df97 to your computer and use it in GitHub Desktop.
Save sentientmachine/820e0c47b07fe90580e4c758d9c5df97 to your computer and use it in GitHub Desktop.
Extract all cryptocurrencies data from coinmarketcap.com using Python 3 and Request
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
import datetime
#pip3 install --user bs4
#pip3 install --user scrapy
def extract(url):
print("Export all cryptodata from cryptomarketcap.com")
"""
USAGE:
Arguments:
url (str):
url of the aimed Coinmarketcap page
Returns:
.csv file
"""
# Initialization
r = requests.session()
start = datetime.datetime.now()
#retry if site is inaccessible
for retry in range(10):
response = r.get(url=url)
print("response is: ")
print(response.headers)
print("-- STATUS CODE --")
print(response.status_code)
#I gave up on maintaining this code since screen-scraping is against the terms of coinmarketcap.com
#and they work very hard to prevent that. Sustainable solution was for me to make an account with
#coinbase.com and use the developer's API key and there's some python code to request ohlcv daily prices
#The requets, scrapy and BeautifulSoup isn't strong enough for this job.
#They obfuscate the html every few months anyway, to throw you off.
print("now do parsing")
if response.status_code == 200:
print("response code is good")
#with open("/path/to/coinmarketcap/cryptocurrencies_{}.csv".format(str(datetime.date.today())), "w") as f:
with open("/tmp/coinmarketcap/cryptocurrencies_{}.csv".format(str(datetime.date.today())), "w") as f:
fieldnames = ['Nom', 'Symbole', 'Cap. marche', 'Prix', 'Offre en circulation', 'Volume (24h)', '% 1h', '% 24h', '7 j']
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
writer.writeheader()
#print("response.text is")
soup = BeautifulSoup(response.text, features='html.parser')
sel = Selector(text=soup.prettify())
#cryptos = sel.xpath("//tr[contains(@id, 'id-')]").extract()
cryptos = sel.xpath("//tr[contains(@class, 'cmc-table-row')]").extract()
print("cryptos list: '" + str(cryptos) + "'")
for crypto in cryptos:
soup = BeautifulSoup(crypto, features='html.parser')
sel = Selector(text=soup.prettify())
print("found a crypto: " + str(crypto))
#nom = sel.xpath("//td[contains(@class, 'currency-name')]/@data-sort").extract_first()
nom = sel.xpath("//a[contains(@class, 'cmc-table__column-name--name')]/text()").extract_first()
symbole = sel.xpath("//td[contains(@class, 'col-symbol')]/text()").extract_first()
cap_marche = sel.xpath("//td[contains(@class, 'market-cap')]/text()").extract_first()
prix = sel.xpath("//a[@class='price']/@data-usd").extract_first()
offre_circulation = sel.xpath("//a[@class='volume']/@data-usd").extract_first()
volume = sel.xpath("//td[contains(@class, 'circulating-supply')]/@data-sort").extract_first()
percent_1h = sel.xpath("//td[@data-timespan='1h']/@data-sort").extract_first()
percent_24h = sel.xpath("//td[@data-timespan='24h']/@data-sort").extract_first()
percent_7j = sel.xpath("//td[@data-timespan='7d']/@data-sort").extract_first()
clean_values = []
values = [nom, symbole, cap_marche, prix, offre_circulation, volume, percent_1h, percent_24h, percent_7j]
for value in values:
if value:
value = value.strip().replace('\n', '')
clean_values.append(value)
#print(', '.join(clean_values))
dict_row = dict(zip(fieldnames, clean_values))
writer.writerow(dict_row)
# amount of time elapsed
end = datetime.datetime.now()
time_elapsed = str(end - start)
print('\n')
print('-- TIME ELAPSED --')
print(time_elapsed)
break
elif response.status_code == 404:
print("Page indisponible")
break
else:
print("Can't load page.")
return []
def main():
#url = "https://coinmarketcap.com/fr/all/views/all/"
url = "https://coinmarketcap.com/all/views/all/"
print("url: '" + str(url) + "'")
extract(url)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment