Skip to content

Instantly share code, notes, and snippets.

@lobstrio
Last active October 24, 2023 13:26
Show Gist options
  • Save lobstrio/972a60304517e12d85682beee4d61703 to your computer and use it in GitHub Desktop.
Save lobstrio/972a60304517e12d85682beee4d61703 to your computer and use it in GitHub Desktop.
Extract all cryptocurrencies data from coinmarketcap.com using Python 3 and Request
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
import datetime
def extract(url):
"""
Export all cryptodata from cryptomarketcap.com
website
Arguments:
url (str):
url of the aimed Coinmarketcap page
Return:
.csv file
"""
# INITIALISATION
r = requests.session()
start = datetime.datetime.now()
# COLLECTE DU CODE SOURCE
for retry in range(10): # ajout d'un retrier en cas de site inaccessible
response = r.get(url=url)
print(response.headers)
print("-- STATUS CODE --")
print(response.status_code)
# PARSING ET CREATION DU CSV
if response.status_code == 200:
with open("/path/to/coinmarketcap/cryptocurrencies_{}.csv".format(str(datetime.date.today())), "w") as f:
fieldnames = ['Nom', 'Symbole', 'Cap. marché', 'Prix', 'Offre en circulation', 'Volume (24h)', '% 1h', '% 24h', '7 j']
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
writer.writeheader()
soup = BeautifulSoup(response.text, features='html.parser')
sel = Selector(text=soup.prettify())
cryptos = sel.xpath("//tr[contains(@id, 'id-')]").extract()
print(cryptos)
for crypto in cryptos:
soup = BeautifulSoup(crypto, features='html.parser')
sel = Selector(text=soup.prettify())
nom = sel.xpath("//td[contains(@class, 'currency-name')]/@data-sort").extract_first()
symbole = sel.xpath("//td[contains(@class, 'col-symbol')]/text()").extract_first()
cap_marche = sel.xpath("//td[contains(@class, 'market-cap')]/text()").extract_first()
prix = sel.xpath("//a[@class='price']/@data-usd").extract_first()
offre_circulation = sel.xpath("//a[@class='volume']/@data-usd").extract_first()
volume = sel.xpath("//td[contains(@class, 'circulating-supply')]/@data-sort").extract_first()
percent_1h = sel.xpath("//td[@data-timespan='1h']/@data-sort").extract_first()
percent_24h = sel.xpath("//td[@data-timespan='24h']/@data-sort").extract_first()
percent_7j = sel.xpath("//td[@data-timespan='7d']/@data-sort").extract_first()
clean_values = []
values = [nom, symbole, cap_marche, prix, offre_circulation, volume, percent_1h, percent_24h, percent_7j]
for value in values:
if value:
value = value.strip().replace('\n', '')
clean_values.append(value)
print(', '.join(clean_values))
dict_row = dict(zip(fieldnames, clean_values))
writer.writerow(dict_row)
# TEMPS PASSE
end = datetime.datetime.now()
time_elapsed = str(end - start)
print('\n')
print('-- TIME ELAPSED --')
print(time_elapsed)
break
elif response.status_code == 404:
print("Page indisponible")
break
else:
print("Impossible d'accéder à la page")
return []
def main():
url = "https://coinmarketcap.com/fr/all/views/all/"
extract(url)
if __name__ == '__main__':
main()
@gijdoul
Copy link

gijdoul commented Dec 20, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment