Skip to content

Instantly share code, notes, and snippets.

Last active January 11, 2021 15:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save junaidk/0732f61336b823887776deca6388fe5c to your computer and use it in GitHub Desktop.
Save junaidk/0732f61336b823887776deca6388fe5c to your computer and use it in GitHub Desktop.
Download images and videos from reddit saved posts
package main
import (
const basePath = "<download-base-path>"
const paralledDownload = 2
func main() {
// file obtained with
dat, err := ioutil.ReadFile("<path to reddit_export.html")
if err != nil {
doc := soup.HTMLParse(string(dat))
links := doc.FindAll("li")
jobs := make(chan Job, len(links))
var wg sync.WaitGroup
for w := 1; w <= paralledDownload; w++ {
go worker(w, jobs, &wg)
for index, link := range links {
anchors := link.FindAll("a")
if len(anchors) < 2 {
split := strings.Split(anchors[1].Attrs()["href"], "/")
name := split[len(split)-2]
folder := split[4]
folderPath := path.Join(basePath, folder)
filePath := path.Join(folderPath, name)
url := anchors[0].Attrs()["href"]
job := Job{
Index: index,
Url: url,
FilePath: filePath,
FolderPath: folderPath,
jobs <- job
// to stop the worker, first close the job channel
// then wait using the WaitGroup
type Job struct {
Index int
Url string
FilePath string
FolderPath string
func worker(id int, jobs <-chan Job, wg *sync.WaitGroup) {
defer wg.Done()
for job := range jobs {
newUrl := urlGenerator(job.Url)
"worker %d started\norignal_url: %s \nnew_url: %s \nfilename: %s\n", id, job.Url, newUrl, job.FilePath,
if len(newUrl) == 0 {
putFile(job.FilePath, newUrl)
//fmt.Println("worker", id, "finished job",job.Url)
fmt.Println("worker", id, "finished")
func urlGenerator(urlInput string) string {
var urlOut string
if strings.Contains(urlInput, "") ||
strings.Contains(urlInput, "") ||
strings.Contains(urlInput, "imgur") ||
strings.Contains(urlInput, "redgifs") {
if strings.Contains(urlInput, "gfycat") {
urlOut = gfyCatMP4Url(urlInput)
if strings.Contains(urlOut, "redgifs") {
urlOut = strings.Replace(urlOut, "-mobile", "", 1)
} else {
urlOut = strings.Replace(urlOut, "thumbs", "giant", 1)
urlOut = strings.Replace(urlOut, "-mobile", "", 1)
} else if strings.Contains(urlInput, "i.imgur") {
urlOut = strings.Replace(urlInput, "gifv", "mp4", 1)
} else if strings.Contains(urlInput, "redgifs") {
urlOut = redgifMP4Url(urlInput)
urlOut = strings.Replace(urlOut, "-mobile", "", 1)
} else {
urlOut = urlInput
} else {
urlOut = ""
return urlOut
func redgifMP4Url(urlInput string) string {
resp, err := soup.Get(urlInput)
if err != nil {
return ""
doc := soup.HTMLParse(string(resp))
link := doc.Find("source", "type", "video/mp4")
if link.Error != nil {
return ""
urlOut, ok := link.Attrs()["src"]
if !ok {
return ""
return urlOut
func gfyCatMP4Url(urlInput string) string {
resp, err := soup.Get(urlInput)
if err != nil {
return ""
doc := soup.HTMLParse(string(resp))
link := doc.Find("meta", "property", "og:video")
if link.Error != nil {
return ""
urlOut, ok := link.Attrs()["content"]
if !ok {
return ""
return urlOut
func putFile(fileName, url string) {
client := httpClient()
resp, err := client.Get(url)
if err != nil {
fmt.Printf("error in downloading %s, %s\n", url, err.Error())
defer resp.Body.Close()
ext := getExtention(resp.Header["Content-Type"][0])
filePath := fileName + "." + ext
if ensureFile(filePath) {
fmt.Printf("file %s exists, not downloading\n", filePath)
file := createImage(filePath)
//counter := &WriteCounter{}
//size, err := io.Copy(file, io.TeeReader(resp.Body, counter))
_, err = io.Copy(file, resp.Body)
defer file.Close()
//fmt.Printf("Just Downloaded a file %s with size %s\n", fileName, humanize.Bytes(uint64(size)))
func getExtention(url string) string {
splits := strings.Split(url, "/")
ext := splits[len(splits)-1]
return ext
func httpClient() *http.Client {
client := http.Client{
CheckRedirect: func(r *http.Request, via []*http.Request) error {
r.URL.Opaque = r.URL.Path
return nil
return &client
func createImage(fileName string) *os.File {
file, err := os.Create(fileName)
return file
func ensureFile(filePath string) bool {
if _, err := os.Stat(filePath); err == nil {
return true
} else if os.IsNotExist(err) {
return false
return false
func ensureDir(dirName string) error {
err := os.Mkdir(dirName, os.ModeDir)
if err == nil || os.IsExist(err) {
return nil
} else {
return err
func checkError(err error) {
if err != nil {
type WriteCounter struct {
Total uint64
func (wc *WriteCounter) Write(p []byte) (int, error) {
n := len(p)
wc.Total += uint64(n)
return n, nil
// PrintProgress prints the progress of a file write
func (wc WriteCounter) PrintProgress() {
// Clear the line by using a character return to go back to the start and remove
// the remaining characters by filling it with spaces
fmt.Printf("\r%s", strings.Repeat(" ", 50))
// Return again and print current status of download
// We use the humanize package to print the bytes in a meaningful way (e.g. 10 MB)
fmt.Printf("\rDownloading... %s complete", humanize.Bytes(wc.Total))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment