Skip to content

Instantly share code, notes, and snippets.

@jrmsklar
Created April 2, 2019 22:35
Show Gist options
  • Save jrmsklar/0be1ae6bc32cd907654bb971322b09b3 to your computer and use it in GitHub Desktop.
Save jrmsklar/0be1ae6bc32cd907654bb971322b09b3 to your computer and use it in GitHub Desktop.
Script to input a list of npm dependency names and pull relevant information for each one and generate a csv list.
# Script to input a list of npm dependency name and pull relevant information for each one
# and generate a csv list.
import sys
import requests
from bs4 import BeautifulSoup
import pandas
if len(sys.argv) != 3:
print 'Please specify two command line arguments:'
print '\trelative path to filename that lists all npm dependencies'
print '\tyour last name (e.g. Sklar)'
exit()
dependency_file_name = sys.argv[1]
user_name = sys.argv[2]
lines = open(dependency_file_name).readlines()
records = []
for index, dependency_name in enumerate(lines, start=1):
npmjs_url = 'https://www.npmjs.com/package/' + dependency_name.rstrip()
print 'On ' + npmjs_url
npmjs_r = requests.get(npmjs_url)
npmjs_soup = BeautifulSoup(npmjs_r.text, 'html.parser')
attributes = npmjs_soup.find_all('p', attrs={'class': 'n8Z-E'})
version = attributes[0].text
license = attributes[1].text
github_url = npmjs_soup.find_all('a', attrs={'class': 'zE7yA n8Z-E fw6 mb3 mt2 truncate black-80 f4 link'})[-1]['href']
github_r = requests.get(github_url)
github_soup = BeautifulSoup(github_r.text, 'html.parser')
description_soup = github_soup.find('span', attrs={'itemprop': 'about'})
description = ''
if description_soup is not None:
description = description_soup.text.lstrip().rstrip()
row = (dependency_name, version, license, description, user_name, npmjs_url)
print row
records.append(row)
df = pandas.DataFrame(records)
df.to_csv('dependencies.csv', index=False, encoding='utf-8')
print 'Wrote to dependencies.csv\n' + str(index) + '/' + str(len(lines)) + '\n'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment