Skip to content

Instantly share code, notes, and snippets.

@Andoryuuta
Last active January 6, 2018 19:02
Show Gist options
  • Save Andoryuuta/80e1cc5d5437985b0c41f61da84de8ab to your computer and use it in GitHub Desktop.
Save Andoryuuta/80e1cc5d5437985b0c41f61da84de8ab to your computer and use it in GitHub Desktop.
Scrape GitHub repo stars
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"strconv"
"time"
)
type StarGazer struct {
Login string `json:"login"`
ID int `json:"id"`
AvatarURL string `json:"avatar_url"`
GravatarID string `json:"gravatar_id"`
URL string `json:"url"`
HTMLURL string `json:"html_url"`
FollowersURL string `json:"followers_url"`
FollowingURL string `json:"following_url"`
GistsURL string `json:"gists_url"`
StarredURL string `json:"starred_url"`
SubscriptionsURL string `json:"subscriptions_url"`
OrganizationsURL string `json:"organizations_url"`
ReposURL string `json:"repos_url"`
EventsURL string `json:"events_url"`
ReceivedEventsURL string `json:"received_events_url"`
Type string `json:"type"`
SiteAdmin bool `json:"site_admin"`
}
const (
API_GET_REPO_STARGAZERS = "https://api.github.com/repos/%s/stargazers?page=%d&per_page=%d&access_token=%s"
)
func main() {
repo := flag.String("repo", "", "GitHub `repo` path (e.g. 'octocat/Spoon-Knife'")
userAgent := flag.String("ua", "", "Your GitHub username for `User-Agent` (e.g. 'octocat')")
accessToken := flag.String("access_token", "", "A GitHub `access_token` (e.g. '098f6bcd4621d373cade4e832627b4f66ab21f7c')")
outputFileName := flag.String("out", "stargazers.json", "Output `file name`")
flag.Parse()
if *repo == "" || *userAgent == "" || *accessToken == "" {
fmt.Println("repo, ua, and access_token are required.")
flag.PrintDefaults()
os.Exit(1)
}
client := &http.Client{}
var allStarGazers []StarGazer
// Page 0 and 1 are the same for some reason, so start at page 1 to ignore dups.
for page := 1; ; page++ {
// Form the request
req, err := http.NewRequest("GET", fmt.Sprintf(API_GET_REPO_STARGAZERS, *repo, page, 100, *accessToken), nil)
if err != nil {
panic(err)
}
// GitHub recommends using a username for user-agent if they need to contact you.
req.Header.Set("User-Agent", *userAgent)
// Send the request and read the response.
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
// Honor the Retry-After header
if retryAfterString, ok := resp.Header["Retry-After"]; ok {
fmt.Println("Got Header Retry-After:", retryAfterString[0])
rtTime, err := strconv.Atoi(retryAfterString[0])
if err != nil {
log.Println("Failed to parse Retry-After header as int")
panic(err)
}
// Sleep for given time.
time.Sleep(time.Duration(rtTime) * time.Second)
}
// Unmarshal result
var curStarGazers []StarGazer
err = json.Unmarshal(body, &curStarGazers)
if err != nil {
panic(err)
}
// Check if this was the last page.
if len(curStarGazers) == 0 {
break
}
// Append stargazer info from this current page to the total collection.
allStarGazers = append(allStarGazers, curStarGazers...)
}
// Pretty print / marshal all of the results and output.
b, err := json.MarshalIndent(allStarGazers, "", "\t")
if err != nil {
panic(err)
}
ioutil.WriteFile(*outputFileName, b, 0777)
fmt.Println("Done!")
fmt.Printf("Wrote %d stargazers info to %s\n", len(allStarGazers), *outputFileName)
}
@Andoryuuta
Copy link
Author

That's a fake access_token BTW, no need to worry about it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment