tatianass/get_all_kaggle_datasets_with_tags.py

## get_all_kaggle_datasets_with_tags.py
#!/usr/bin/python

from kaggle.api.kaggle_api_extended import KaggleApi
import csv, sys, os

# Authentificaiton
# Make sure to set your username and key in your enviroment variables.
api = KaggleApi()
api.authenticate()

fields = ['ref', 'title', 'tags', 'size', 'lastUpdated', 'downloadCount']

page = 1
with open('kaggle_datasets.csv', 'w', newline='') as csv_file:

    writer = csv.writer(csv_file, delimiter=';')
    writer.writerow(fields) # writes header

    # Writes information while there's still pages to search
    while True:
        try:
            datasets = api.dataset_list(sort_by='hottest', size='all', file_type='all', license_name='all', page=page)

            for i in datasets:
                for tag in i.tags:
                    writer.writerow([i.ref, i.title, tag, i.size, i.lastUpdated, i.downloadCount])

            page += 1
        except Exception as e:
            print('No more pages to load.')
            break
	#!/usr/bin/python

	from kaggle.api.kaggle_api_extended import KaggleApi
	import csv, sys, os

	# Authentificaiton
	# Make sure to set your username and key in your enviroment variables.
	api = KaggleApi()
	api.authenticate()

	fields = ['ref', 'title', 'tags', 'size', 'lastUpdated', 'downloadCount']

	page = 1
	with open('kaggle_datasets.csv', 'w', newline='') as csv_file:

	writer = csv.writer(csv_file, delimiter=';')
	writer.writerow(fields) # writes header

	# Writes information while there's still pages to search
	while True:
	try:
	datasets = api.dataset_list(sort_by='hottest', size='all', file_type='all', license_name='all', page=page)

	for i in datasets:
	for tag in i.tags:
	writer.writerow([i.ref, i.title, tag, i.size, i.lastUpdated, i.downloadCount])

	page += 1
	except Exception as e:
	print('No more pages to load.')
	break