Skip to content

Instantly share code, notes, and snippets.

@cjdinger
Created January 4, 2017 21:40
Show Gist options
  • Save cjdinger/c9fa484ccf775cc5e611a935e9699d27 to your computer and use it in GitHub Desktop.
Save cjdinger/c9fa484ccf775cc5e611a935e9699d27 to your computer and use it in GitHub Desktop.
Using SAS to report on GitHub repos for an organization or user account
/* Fetch the repositories and stats from an */
/* organization's account on GitHub */
/* Requires SAS 9.4 Maint 4 */
/* by Chris Hemedinger */
/* Copyright 2017 SAS Institute Inc. */
/* Change this value to the account you want to report on */
/* Some fun accounts: netflix, spotify, apache */
%let account=sassoftware;
%let acct_type=orgs;
/* Or try reporting on an individual account */
*%let account=cjdinger;
*%let acct_type=users;
/* Establish temp file for HTTP response */
filename resp temp;
/* Use this for debugging the HTTP response if needed */
/* %echoResp(fn=resp) will echo the response to the SAS log */
%macro echoResp(fn=);
data _null_;
infile &fn;
input;
put _infile_;
run;
%mend;
/* NOTE: GitHub API is rate-limited for unauthenticated calls */
/* Repeated runs from the same IP address */
/* will result in suspension of results */
/* for a short time. */
/* Note that SAS University Edition won't support this, */
/* requires SSL (https) support. */
/* Get Org metadata, including repo count */
proc http
url="https://api.github.com/&acct_type./&account."
method="GET"
out=resp
;
run;
/* Read response as JSON data, extract count field */
/* It's in the ROOT data set, found via experiment */
libname ss json fileref=resp;
data meta;
set ss.root;
call symputx('repocount',public_repos);
call symputx('acctname',name);
call symputx('accturl',html_url);
run;
/* GitHub API paginates at 30 repositories, so calc the */
/* number of pages we need to call to get all repos */
/* Number of repos / 30, rounded up to next integer */
%let pages=%sysfunc(ceil(%sysevalf(&repocount / 30)));
/* This trimmed JSON map defines just the fields we want */
/* Created by using AUTOMAP=CREATE on JSON libname */
/* then editing the generated map file to reduce to */
/* minimum number of fields of interest */
filename repomap temp;
data _null_;
infile datalines;
file repomap;
input;
put _infile_;
datalines;
{
"DATASETS": [
{
"DSNAME": "root",
"TABLEPATH": "/root",
"VARIABLES": [
{
"NAME": "id",
"TYPE": "NUMERIC",
"PATH": "/root/id"
},
{
"NAME": "name",
"TYPE": "CHARACTER",
"PATH": "/root/name",
"CURRENT_LENGTH": 50,
"LENGTH": 50
},
{
"NAME": "html_url",
"TYPE": "CHARACTER",
"PATH": "/root/html_url",
"CURRENT_LENGTH": 100,
"LENGTH": 100
},
{
"NAME": "language",
"TYPE": "CHARACTER",
"PATH": "/root/language",
"CURRENT_LENGTH": 20,
"LENGTH": 20
},
{
"NAME": "description",
"TYPE": "CHARACTER",
"PATH": "/root/description",
"CURRENT_LENGTH": 300,
"LENGTH": 500
},
{
"NAME": "created_at",
"TYPE": "NUMERIC",
"INFORMAT": [ "IS8601DT", 19, 0 ],
"FORMAT": ["DATETIME", 20],
"PATH": "/root/created_at",
"CURRENT_LENGTH": 20
},
{
"NAME": "updated_at",
"TYPE": "NUMERIC",
"INFORMAT": [ "IS8601DT", 19, 0 ],
"FORMAT": ["DATETIME", 20],
"PATH": "/root/updated_at",
"CURRENT_LENGTH": 20
},
{
"NAME": "pushed_at",
"TYPE": "NUMERIC",
"INFORMAT": [ "IS8601DT", 19, 0 ],
"FORMAT": ["DATETIME", 20],
"PATH": "/root/pushed_at",
"CURRENT_LENGTH": 20
},
{
"NAME": "size",
"TYPE": "NUMERIC",
"PATH": "/root/size"
},
{
"NAME": "stars",
"TYPE": "NUMERIC",
"PATH": "/root/stargazers_count"
},
{
"NAME": "forks",
"TYPE": "NUMERIC",
"PATH": "/root/forks"
},
{
"NAME": "open_issues",
"TYPE": "NUMERIC",
"PATH": "/root/open_issues"
}
]
}
]
}
;
run;
/* GETREPOS: iterate through each "page" of repositories */
/* and collect the GitHub data */
/* Output: <account>_REPOS, a data set with all basic data */
/* about an account's public repositories */
%macro getrepos;
%do i = 1 %to &pages;
proc http
url="https://api.github.com/&acct_type./&account./repos?page=&i."
method="GET"
out=resp
;
run;
/* Use JSON engine with defined map to capture data */
libname repos json map=repomap fileref=resp;
data _repos&i.;
set repos.root;
run;
%end;
/* Concatenate all pages of data */
data &account._allrepos;
set _repos:;
run;
/* delete intermediate repository data */
proc datasets nolist nodetails;
delete _repos:;
quit;
%mend;
/* Run the macro */
%getrepos;
/* Best with ODS HTML output */
title "github.com/&account (&acctname.): Repositories and stats";
title2 "ALL &repocount. repos, Data pulled with GitHub API as of &SYSDATE.";
title3 height=1 link="&accturl." "See &acctname. on GitHub";
proc sql;
select
catt('<a href="',t1.html_url,'">',t1.name,"</a>") as Repository,
case
when length(t1.description)>50 then cat(substr(t1.description,1,49),'...')
else t1.description
end as Description,
t1.language as Language,
t1.created_at format=dtdate9. as Created,
t1.pushed_at format=dtdate9. as Last_Update,
t1.stars as Stars,
t1.forks as Forks,
t1.open_issues as Open_Issues
from &account._allrepos t1
order by t1.pushed_at desc;
quit;
@cjdinger
Copy link
Author

cjdinger commented Jan 4, 2017

Sample report output:
samplereport

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment