Skip to content

Instantly share code, notes, and snippets.

@carlmjohnson carlmjohnson/get-repos.go Secret
Last active Feb 1, 2018

Embed
What would you like to do?
Fix race in wg.Wait()
package main
import (
"encoding/json"
"fmt"
"net/http"
"os"
"strconv"
"sync"
"github.com/peterhellberg/link"
)
// Endpoint URLs
const (
GithubAPIURL = "https://api.github.com"
GithubOrgID = "pbs"
GithubOrgMembersURL = GithubAPIURL + "/orgs/" + GithubOrgID + "/members"
)
func die(err error) {
if err != nil {
fmt.Fprintln(os.Stderr, "Fatal error:", err)
os.Exit(1)
}
}
// Github token global
var token string
func initConfig() error {
f, err := os.Open("token.json")
if err != nil {
return err
}
var tokenJSON struct{ AuthToken string }
dec := json.NewDecoder(f)
err = dec.Decode(&tokenJSON)
if err != nil {
return err
}
token = tokenJSON.AuthToken
return nil
}
func githubRequest(url string, data interface{}) (nextUrl string, err error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return "", err
}
req.Header.Add("Authorization", fmt.Sprintf("token %s", token))
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("Bad status (%d) for %s", resp.StatusCode, resp.Request.URL)
}
defer resp.Body.Close()
// Look at header to see if this is a paginated response
if l, ok := link.ParseResponse(resp)["next"]; ok {
nextUrl = l.URI
}
dec := json.NewDecoder(resp.Body)
return nextUrl, dec.Decode(data)
}
func prettyPrint(data interface{}) error {
b, err := json.MarshalIndent(data, "", "\t")
if err != nil {
return err
}
_, err = fmt.Println(string(b))
return err
}
func main() {
die(initConfig())
// Get number of workers from WORKERS env variable, with default 10
numWorkers, _ := strconv.Atoi(os.Getenv("WORKERS"))
if numWorkers < 1 {
numWorkers = 10
}
var wg sync.WaitGroup
semaphore := make(chan bool, numWorkers)
type result struct {
data interface{}
err error
}
results := make(chan result)
// Pre-declaring closures so they can refer to themselves
var repoFetcher, orgFetcher func(url string)
// Define repoFetcher as a closure to pass in the concurrency variables
repoFetcher = func(url string) {
var resultData []struct {
Full_name string
Description string
}
// Grab semaphore slot
semaphore <- true
nextUrl, err := githubRequest(url, &resultData)
// Release semaphore slot
<-semaphore
if nextUrl != "" {
wg.Add(1)
go repoFetcher(nextUrl)
}
results <- result{&resultData, err}
wg.Done()
}
orgFetcher = func(url string) {
var data []struct {
Repos_url string
}
// Grab semaphore slot
semaphore <- true
nextUrl, err := githubRequest(url, &data)
die(err)
// Release semaphore slot
<-semaphore
if nextUrl != "" {
wg.Add(1)
go orgFetcher(nextUrl)
}
for i := range data {
wg.Add(1)
go repoFetcher(data[i].Repos_url)
}
wg.Done()
}
// Kick off the process
wg.Add(1)
go orgFetcher(GithubOrgMembersURL)
// Once all workers are done, close results channel
go func() {
wg.Wait()
close(results)
}()
// Gather results
for r := range results {
die(r.err)
die(prettyPrint(&r.data))
}
}
import asyncio
import json
import os
import pprint
import aiohttp
GITHUB_API_URL = 'https://api.github.com'
GITHUB_ORG_ID = 'pbs'
with open('{0}/token.json'.format(os.path.abspath('.')), encoding='utf-8') as data_file:
OAUTH_TOKEN = json.loads(data_file.read())['authToken']
loop = asyncio.get_event_loop()
def pretty_print(thing):
pprint.PrettyPrinter(indent=4).pprint(thing)
async def fetch_github_page(session, url):
headers = {
'Authorization': 'token {0}'.format(OAUTH_TOKEN),
}
with aiohttp.Timeout(10):
async with session.get(url, headers=headers) as response:
assert response.status == 200
content = (await response.read()).decode()
return json.loads(content)
async def multi_fetch_github_pages(session, urls):
requests = [fetch_github_page(session, url) for url in urls]
responses = await asyncio.gather(*requests)
return responses
def main():
org_url = '{0}/orgs/{1}/members'.format(GITHUB_API_URL, GITHUB_ORG_ID)
with aiohttp.ClientSession(loop=loop) as session:
data = loop.run_until_complete(
fetch_github_page(session, org_url))
urls = [member_data['repos_url'] for member_data in data]
responses = loop.run_until_complete(multi_fetch_github_pages(session, urls))
for response in responses:
for repo in response:
pretty_print((repo['full_name'], repo['description']))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.