Created
January 4, 2017 21:40
-
-
Save cjdinger/c9fa484ccf775cc5e611a935e9699d27 to your computer and use it in GitHub Desktop.
Using SAS to report on GitHub repos for an organization or user account
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Fetch the repositories and stats from an */ | |
/* organization's account on GitHub */ | |
/* Requires SAS 9.4 Maint 4 */ | |
/* by Chris Hemedinger */ | |
/* Copyright 2017 SAS Institute Inc. */ | |
/* Change this value to the account you want to report on */ | |
/* Some fun accounts: netflix, spotify, apache */ | |
%let account=sassoftware; | |
%let acct_type=orgs; | |
/* Or try reporting on an individual account */ | |
*%let account=cjdinger; | |
*%let acct_type=users; | |
/* Establish temp file for HTTP response */ | |
filename resp temp; | |
/* Use this for debugging the HTTP response if needed */ | |
/* %echoResp(fn=resp) will echo the response to the SAS log */ | |
%macro echoResp(fn=); | |
data _null_; | |
infile &fn; | |
input; | |
put _infile_; | |
run; | |
%mend; | |
/* NOTE: GitHub API is rate-limited for unauthenticated calls */ | |
/* Repeated runs from the same IP address */ | |
/* will result in suspension of results */ | |
/* for a short time. */ | |
/* Note that SAS University Edition won't support this, */ | |
/* requires SSL (https) support. */ | |
/* Get Org metadata, including repo count */ | |
proc http | |
url="https://api.github.com/&acct_type./&account." | |
method="GET" | |
out=resp | |
; | |
run; | |
/* Read response as JSON data, extract count field */ | |
/* It's in the ROOT data set, found via experiment */ | |
libname ss json fileref=resp; | |
data meta; | |
set ss.root; | |
call symputx('repocount',public_repos); | |
call symputx('acctname',name); | |
call symputx('accturl',html_url); | |
run; | |
/* GitHub API paginates at 30 repositories, so calc the */ | |
/* number of pages we need to call to get all repos */ | |
/* Number of repos / 30, rounded up to next integer */ | |
%let pages=%sysfunc(ceil(%sysevalf(&repocount / 30))); | |
/* This trimmed JSON map defines just the fields we want */ | |
/* Created by using AUTOMAP=CREATE on JSON libname */ | |
/* then editing the generated map file to reduce to */ | |
/* minimum number of fields of interest */ | |
filename repomap temp; | |
data _null_; | |
infile datalines; | |
file repomap; | |
input; | |
put _infile_; | |
datalines; | |
{ | |
"DATASETS": [ | |
{ | |
"DSNAME": "root", | |
"TABLEPATH": "/root", | |
"VARIABLES": [ | |
{ | |
"NAME": "id", | |
"TYPE": "NUMERIC", | |
"PATH": "/root/id" | |
}, | |
{ | |
"NAME": "name", | |
"TYPE": "CHARACTER", | |
"PATH": "/root/name", | |
"CURRENT_LENGTH": 50, | |
"LENGTH": 50 | |
}, | |
{ | |
"NAME": "html_url", | |
"TYPE": "CHARACTER", | |
"PATH": "/root/html_url", | |
"CURRENT_LENGTH": 100, | |
"LENGTH": 100 | |
}, | |
{ | |
"NAME": "language", | |
"TYPE": "CHARACTER", | |
"PATH": "/root/language", | |
"CURRENT_LENGTH": 20, | |
"LENGTH": 20 | |
}, | |
{ | |
"NAME": "description", | |
"TYPE": "CHARACTER", | |
"PATH": "/root/description", | |
"CURRENT_LENGTH": 300, | |
"LENGTH": 500 | |
}, | |
{ | |
"NAME": "created_at", | |
"TYPE": "NUMERIC", | |
"INFORMAT": [ "IS8601DT", 19, 0 ], | |
"FORMAT": ["DATETIME", 20], | |
"PATH": "/root/created_at", | |
"CURRENT_LENGTH": 20 | |
}, | |
{ | |
"NAME": "updated_at", | |
"TYPE": "NUMERIC", | |
"INFORMAT": [ "IS8601DT", 19, 0 ], | |
"FORMAT": ["DATETIME", 20], | |
"PATH": "/root/updated_at", | |
"CURRENT_LENGTH": 20 | |
}, | |
{ | |
"NAME": "pushed_at", | |
"TYPE": "NUMERIC", | |
"INFORMAT": [ "IS8601DT", 19, 0 ], | |
"FORMAT": ["DATETIME", 20], | |
"PATH": "/root/pushed_at", | |
"CURRENT_LENGTH": 20 | |
}, | |
{ | |
"NAME": "size", | |
"TYPE": "NUMERIC", | |
"PATH": "/root/size" | |
}, | |
{ | |
"NAME": "stars", | |
"TYPE": "NUMERIC", | |
"PATH": "/root/stargazers_count" | |
}, | |
{ | |
"NAME": "forks", | |
"TYPE": "NUMERIC", | |
"PATH": "/root/forks" | |
}, | |
{ | |
"NAME": "open_issues", | |
"TYPE": "NUMERIC", | |
"PATH": "/root/open_issues" | |
} | |
] | |
} | |
] | |
} | |
; | |
run; | |
/* GETREPOS: iterate through each "page" of repositories */ | |
/* and collect the GitHub data */ | |
/* Output: <account>_REPOS, a data set with all basic data */ | |
/* about an account's public repositories */ | |
%macro getrepos; | |
%do i = 1 %to &pages; | |
proc http | |
url="https://api.github.com/&acct_type./&account./repos?page=&i." | |
method="GET" | |
out=resp | |
; | |
run; | |
/* Use JSON engine with defined map to capture data */ | |
libname repos json map=repomap fileref=resp; | |
data _repos&i.; | |
set repos.root; | |
run; | |
%end; | |
/* Concatenate all pages of data */ | |
data &account._allrepos; | |
set _repos:; | |
run; | |
/* delete intermediate repository data */ | |
proc datasets nolist nodetails; | |
delete _repos:; | |
quit; | |
%mend; | |
/* Run the macro */ | |
%getrepos; | |
/* Best with ODS HTML output */ | |
title "github.com/&account (&acctname.): Repositories and stats"; | |
title2 "ALL &repocount. repos, Data pulled with GitHub API as of &SYSDATE."; | |
title3 height=1 link="&accturl." "See &acctname. on GitHub"; | |
proc sql; | |
select | |
catt('<a href="',t1.html_url,'">',t1.name,"</a>") as Repository, | |
case | |
when length(t1.description)>50 then cat(substr(t1.description,1,49),'...') | |
else t1.description | |
end as Description, | |
t1.language as Language, | |
t1.created_at format=dtdate9. as Created, | |
t1.pushed_at format=dtdate9. as Last_Update, | |
t1.stars as Stars, | |
t1.forks as Forks, | |
t1.open_issues as Open_Issues | |
from &account._allrepos t1 | |
order by t1.pushed_at desc; | |
quit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sample report output: