Skip to content

Instantly share code, notes, and snippets.

@subhojit777
Last active October 7, 2015 16:03
Show Gist options
  • Save subhojit777/5df702f24d4154c6da64 to your computer and use it in GitHub Desktop.
Save subhojit777/5df702f24d4154c6da64 to your computer and use it in GitHub Desktop.
<?php
// BEWARE this script is going to create lots of junk files.
// Thanks to http://htmlparsing.com/php.html
$url = "https://github.com/hechoendrupal/DrupalConsole/tree/master/config/translations/en";
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$html = curl_exec($ch);
curl_close($ch);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$xpath = new DOMXpath($dom);
// Fetch all YAML file rows from the table.
// Note that this XPath query may not work in future.
// XPath query strictly depends on Github's markup.
foreach($xpath->query('//table[contains(@class, "files")]/tbody/tr[@class="js-navigation-item"]') as $node) {
// Fetch the contents of YAML files from every row.
// Parse them and push them in CSV file.
// Note - CSV file type is not mandatory, you can even use txt.
foreach ($xpath->query($node->getNodePath() . '/td[@class="content"]/span/a') as $link) {
$file_name = $link->nodeValue;
$yml = file_get_contents("https://raw.githubusercontent.com/hechoendrupal/DrupalConsole/master/config/translations/en/$file_name");
file_put_contents($file_name, $yml);
// Make sure you got this https://gist.github.com/subhojit777/2df7dae5df9bce9ace95
shell_exec("./parse_yaml.sh $file_name >> export.csv");
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment