Created
June 13, 2014 15:47
-
-
Save jehiah/80ec17768b72fd0f3e43 to your computer and use it in GitHub Desktop.
Encode a bulk csv of links against the Bitly API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This script is used to bulk shorten urls | |
package main | |
import ( | |
"bufio" | |
"flag" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"sync" | |
"time" | |
"encoding/csv" | |
"encoding/json" | |
"io/ioutil" | |
"net/http" | |
"net/url" | |
) | |
var ( | |
bitlyApiUrl string | |
bitlyAccessToken string | |
wg sync.WaitGroup | |
writeWG sync.WaitGroup | |
httpClient *http.Client | |
) | |
type Message struct { | |
url string | |
attempt int | |
} | |
type BitlyApiMsg struct { | |
Data interface{} `json:"data"` | |
StatusCode int `json:"status_code"` | |
StatusTxt string `json:"status_txt"` | |
} | |
func shortenUrl(longurl string) (string, error) { | |
v := url.Values{} | |
v.Add("longUrl", longurl) | |
v.Add("access_token", bitlyAccessToken) | |
apiUrl := fmt.Sprintf("%s?%s", bitlyApiUrl, v.Encode()) | |
log.Printf("GET %s", apiUrl) | |
req, err := http.NewRequest("GET", apiUrl, nil) | |
if err != nil { | |
log.Fatal(err.Error()) | |
return "", nil | |
} | |
req.Close = true // Always open new conn | |
resp, err := httpClient.Do(req) | |
if err != nil { | |
log.Printf("http: bitly api error %s", err.Error()) | |
return "", err | |
} | |
body, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
return "", err | |
} | |
msg := &BitlyApiMsg{} | |
err = json.Unmarshal(body, &msg) | |
if err != nil { | |
return "", fmt.Errorf("ERROR: INVALID RESPONSE %s %s", body, err) | |
} | |
if msg.StatusCode == 403 { | |
log.Printf("RATELIMIT EXCEEDED. pausing 60 seconds") | |
time.Sleep(30 * time.Second) | |
} | |
if msg.StatusCode != 200 { | |
return "", fmt.Errorf("ERROR: BITLY ERROR TEXT %s", msg.StatusTxt) | |
} | |
data, ok := msg.Data.(map[string]interface{}) | |
if !ok { | |
return "", fmt.Errorf("ERROR: INVALID RESPONSE %s %s", body) | |
} | |
url, _ := data["url"] | |
return url.(string), nil | |
} | |
func writeOutput(writer io.Writer, output <-chan []string) { | |
writeWG.Add(1) | |
csvWriter := csv.NewWriter(writer) | |
for outputRecord := range output { | |
csvWriter.Write(outputRecord) | |
} | |
writeWG.Done() | |
} | |
func worker(urls chan Message, retryChan chan Message, exit <-chan bool, output chan<- []string, errorChan chan<- []string) { | |
wg.Add(1) | |
for { | |
select { | |
case message := <-urls: | |
longurl := message.url | |
message.attempt++ | |
shorturl, err := shortenUrl(longurl) | |
if err != nil { | |
log.Printf("error %s", err) | |
time.Sleep(30 * time.Second) | |
retryChan <- message | |
continue | |
} | |
output <- []string{longurl, shorturl} | |
case message := <-retryChan: | |
longurl := message.url | |
message.attempt++ | |
shorturl, err := shortenUrl(longurl) | |
if err != nil { | |
log.Printf("error on retry %s", err) | |
if message.attempt > 5 { | |
errorChan <- []string{longurl, err.Error()} | |
} else { | |
time.Sleep(30 * time.Second) | |
retryChan <- message | |
} | |
continue | |
} | |
output <- []string{longurl, shorturl} | |
case _ = <-exit: | |
wg.Done() | |
return | |
} | |
} | |
} | |
func main() { | |
log.Printf("Starting...") | |
var ( | |
longUrlFileName = flag.String("long-url-file", "", "file with long urls in csv format") | |
outputFileName = flag.String("output-file", "output.csv", "filename to use as output") | |
errorFileName = flag.String("error-file", "errors.csv", "filename to use as error output") | |
accessToken = flag.String("access-token", "", "access token to shorten links with") | |
workers = flag.Int("workers", 10, "number of requests to make simultaneously") | |
field = flag.Int("field", 1, "long url field in long-url-file csv") | |
apiEndpoint = flag.String("bitly-api", "https://api-ssl.bitly.com:443/v3/shorten", "the Bitly API endpoint to request") | |
) | |
flag.Parse() | |
bitlyApiUrl = *apiEndpoint | |
bitlyAccessToken = *accessToken | |
httpClient = &http.Client{} | |
log.Printf("reading input from %q", *longUrlFileName) | |
inputFile, err := os.Open(*longUrlFileName) // For read access. | |
if err != nil { | |
log.Fatal(err) | |
} | |
reader := bufio.NewReader(inputFile) | |
log.Printf("output going to %q", *outputFileName) | |
// open output file | |
outputFile, err := os.Create(*outputFileName) | |
if err != nil { | |
log.Fatal(err) | |
} | |
writer := bufio.NewWriter(outputFile) | |
log.Printf("errors going to %q", *errorFileName) | |
errorFile, err := os.Create(*errorFileName) | |
if err != nil { | |
log.Fatal(err) | |
} | |
errorWriter := bufio.NewWriter(errorFile) | |
urls := make(chan Message, *workers) | |
retryChan := make(chan Message, 1000) | |
output := make(chan []string, *workers) | |
errorChan := make(chan []string, *workers) | |
exit := make(chan bool) | |
for i := 0; i <= *workers; i++ { | |
go worker(urls, retryChan, exit, output, errorChan) | |
} | |
go writeOutput(writer, output) | |
// error writer | |
go writeOutput(errorWriter, errorChan) | |
csvReader := csv.NewReader(reader) | |
count := 0 | |
for { | |
records, err := csvReader.Read() | |
if err != nil { | |
if err == io.EOF { | |
break | |
} else { | |
log.Printf("ERROR : %s", err.Error()) | |
continue | |
} | |
} | |
if len(records) < *field+1 { | |
log.Printf("row %#v", records) | |
log.Printf("set --field appropriately") | |
log.Fatalf("record has %d columns but expected fields %d", len(records), *field+1) | |
} | |
longUrl := records[*field] | |
urlToShorten := &Message{ | |
url: longUrl, | |
attempt: 0, | |
} | |
urls <- *urlToShorten | |
count++ | |
if count%10000 == 0 { | |
log.Printf("Processed %d", count) | |
} | |
} | |
log.Printf("finished input file") | |
close(exit) | |
inputFile.Close() | |
wg.Wait() | |
close(retryChan) | |
for message := range retryChan { | |
errorChan <- []string{message.url, "gave_up"} | |
} | |
close(output) | |
close(errorChan) | |
writeWG.Wait() | |
writer.Flush() | |
errorWriter.Flush() | |
outputFile.Close() | |
errorFile.Close() | |
log.Printf("Done!") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment