Last active
January 6, 2018 19:02
-
-
Save Andoryuuta/80e1cc5d5437985b0c41f61da84de8ab to your computer and use it in GitHub Desktop.
Scrape GitHub repo stars
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"flag" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"os" | |
"strconv" | |
"time" | |
) | |
type StarGazer struct { | |
Login string `json:"login"` | |
ID int `json:"id"` | |
AvatarURL string `json:"avatar_url"` | |
GravatarID string `json:"gravatar_id"` | |
URL string `json:"url"` | |
HTMLURL string `json:"html_url"` | |
FollowersURL string `json:"followers_url"` | |
FollowingURL string `json:"following_url"` | |
GistsURL string `json:"gists_url"` | |
StarredURL string `json:"starred_url"` | |
SubscriptionsURL string `json:"subscriptions_url"` | |
OrganizationsURL string `json:"organizations_url"` | |
ReposURL string `json:"repos_url"` | |
EventsURL string `json:"events_url"` | |
ReceivedEventsURL string `json:"received_events_url"` | |
Type string `json:"type"` | |
SiteAdmin bool `json:"site_admin"` | |
} | |
const ( | |
API_GET_REPO_STARGAZERS = "https://api.github.com/repos/%s/stargazers?page=%d&per_page=%d&access_token=%s" | |
) | |
func main() { | |
repo := flag.String("repo", "", "GitHub `repo` path (e.g. 'octocat/Spoon-Knife'") | |
userAgent := flag.String("ua", "", "Your GitHub username for `User-Agent` (e.g. 'octocat')") | |
accessToken := flag.String("access_token", "", "A GitHub `access_token` (e.g. '098f6bcd4621d373cade4e832627b4f66ab21f7c')") | |
outputFileName := flag.String("out", "stargazers.json", "Output `file name`") | |
flag.Parse() | |
if *repo == "" || *userAgent == "" || *accessToken == "" { | |
fmt.Println("repo, ua, and access_token are required.") | |
flag.PrintDefaults() | |
os.Exit(1) | |
} | |
client := &http.Client{} | |
var allStarGazers []StarGazer | |
// Page 0 and 1 are the same for some reason, so start at page 1 to ignore dups. | |
for page := 1; ; page++ { | |
// Form the request | |
req, err := http.NewRequest("GET", fmt.Sprintf(API_GET_REPO_STARGAZERS, *repo, page, 100, *accessToken), nil) | |
if err != nil { | |
panic(err) | |
} | |
// GitHub recommends using a username for user-agent if they need to contact you. | |
req.Header.Set("User-Agent", *userAgent) | |
// Send the request and read the response. | |
resp, err := client.Do(req) | |
if err != nil { | |
panic(err) | |
} | |
defer resp.Body.Close() | |
body, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
panic(err) | |
} | |
// Honor the Retry-After header | |
if retryAfterString, ok := resp.Header["Retry-After"]; ok { | |
fmt.Println("Got Header Retry-After:", retryAfterString[0]) | |
rtTime, err := strconv.Atoi(retryAfterString[0]) | |
if err != nil { | |
log.Println("Failed to parse Retry-After header as int") | |
panic(err) | |
} | |
// Sleep for given time. | |
time.Sleep(time.Duration(rtTime) * time.Second) | |
} | |
// Unmarshal result | |
var curStarGazers []StarGazer | |
err = json.Unmarshal(body, &curStarGazers) | |
if err != nil { | |
panic(err) | |
} | |
// Check if this was the last page. | |
if len(curStarGazers) == 0 { | |
break | |
} | |
// Append stargazer info from this current page to the total collection. | |
allStarGazers = append(allStarGazers, curStarGazers...) | |
} | |
// Pretty print / marshal all of the results and output. | |
b, err := json.MarshalIndent(allStarGazers, "", "\t") | |
if err != nil { | |
panic(err) | |
} | |
ioutil.WriteFile(*outputFileName, b, 0777) | |
fmt.Println("Done!") | |
fmt.Printf("Wrote %d stargazers info to %s\n", len(allStarGazers), *outputFileName) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
That's a fake access_token BTW, no need to worry about it.