Skip to content

Instantly share code, notes, and snippets.

@skanga
Last active July 2, 2021 09:50
Show Gist options
  • Save skanga/676015f59b180c8206af2396b58436e0 to your computer and use it in GitHub Desktop.
Save skanga/676015f59b180c8206af2396b58436e0 to your computer and use it in GitHub Desktop.
// It requires jsoup.jar (from https://jsoup.org/download) in the classpath
// It needs to parameters on command line:
// 1. The github url to be downloaded
// 2. The local directory where the downloaded files should be stored
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
public class GithubGet
{
private static boolean debugFlag = true;
public static void main(String[] args) throws IOException
{
Validate.isTrue(args.length == 2, "USAGE: supply url to fetch and dest folder");
debug("Fetching %s...", args[0]);
parseDoc(args[0], args[1]);
}
public static void parseDoc(String url, String dirName) throws IOException
{
Document doc = Jsoup.connect(url).get();
Elements fileLinks = doc.select("div[role=row] div[role=gridcell] svg[aria-label=File]");
for (Element currLink : fileLinks) {
Element rowLink = currLink.parent().parent().selectFirst("a.js-navigation-open.Link--primary[href]");
debug("%s -> %s", rowLink.attr("abs:href"), dirName + "/" + rowLink.text());
downloadUrl(rowLink.attr("abs:href"), dirName, rowLink.text());
}
Elements dirLinks = doc.select("div[role=row] div[role=gridcell] svg[aria-label=Directory]");
for (Element currLink : dirLinks) {
Element rowLink = currLink.parent().parent().selectFirst("a.js-navigation-open.Link--primary[href]");
String newDir = dirName + "/" + rowLink.text();
debug("%s -> mkdir %s", rowLink.attr("abs:href"), newDir);
Files.createDirectories(Paths.get(newDir));
parseDoc(rowLink.attr("abs:href"), newDir);
}
}
private static void downloadUrl (String githubUrl, String dirName, String fileName) throws IOException
{
Path destFile = Paths.get(dirName, fileName);
if (Files.exists(destFile)) return;
String rawUrl = githubUrl.replace("/blob/", "/raw/");
Connection.Response resultResponse = Jsoup.connect(rawUrl).ignoreContentType(true).execute();
Files.createDirectories(Paths.get(dirName));
Files.write(destFile, resultResponse.bodyAsBytes());
}
private static void debug(String msg, Object... args) {
if (debugFlag)
System.out.println(String.format(msg, args));
else
System.out.print(".");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment