Skip to content

Instantly share code, notes, and snippets.

/parser Secret

Created December 6, 2017 10:04
Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 21 You must be signed in to fork a gist
  • Save anonymous/dd84a3e0ccde6c1e856b33bce5d8b9ea to your computer and use it in GitHub Desktop.
Save anonymous/dd84a3e0ccde6c1e856b33bce5d8b9ea to your computer and use it in GitHub Desktop.
<?php
namespace AppBundle\Command;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\DomCrawler\Crawler;
class ParseKnpCommand extends ContainerAwareCommand
{
protected function configure()
{
$this
->setName('app:parse-knp')
->setDescription('parse knp')
;
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$em = $this->getContainer()->get('doctrine.orm.default_entity_manager');
$base_uri = 'https://knpuniversity.com';
$cookieJar = CookieJar::fromArray([
'PHPSESSID' => 'xxx',
'REMEMBERME' => 'xxx',
], 'knpuniversity.com');
$baseDir = 'xxx';
$client = new Client(['base_uri' => $base_uri, 'cookies' => $cookieJar]);
$request = $client->get('/courses/all');
$crawler = new Crawler($request->getBody()->getContents());
$coursesDOM = $crawler->filter('div.js-isotope-col > a');
$output->writeln('parsed list');
foreach ($coursesDOM as $courseDOM) {
/** @var \DOMElement $courseDOM */
$courseHref = $courseDOM->getAttribute('href');
$request = $client->get($courseHref);
$crawler = new Crawler($request->getBody()->getContents());
$courseName = $crawler->filter('h1.tuts-header-font-tutorial-overview')->text();
$output->writeln($courseName);continue;
$dirname = preg_replace("/[^\w]+/", "", str_replace(' ', '_', $courseName));
$output->writeln($dirname);
$dir = $baseDir . '/' . $dirname;
if (is_dir($dir)) {
continue;
} else {
mkdir($dir);
}
$output->writeln('parsed ' . $courseHref . ' page');
// $coursePartsDOM = $crawler->filter('div.chapter-list > ul > li > div > div:first-child > a');
$coursePartsDOM = $crawler->filter('div.chapter-list > ul > li > div > div.col-xs-12 > a');
$i = 0;
foreach ($coursePartsDOM as $coursePartDOM) {
/** @var \DOMElement $course */
$coursePartHref = $coursePartDOM->getAttribute('href');
$request = $client->get($coursePartHref);
$crawler1 = new Crawler($request->getBody()->getContents());
$links = $crawler1->filter('div.download-buy-buttons > ul > li > a');
foreach ($links as $linkDOM) {
/** @var \DOMElement $linkDOM */
$linkText = trim($linkDOM->textContent);
$link = $linkDOM->getAttribute('href');
if ($i == 0 && ($linkText == 'Course Code' || $linkText == 'Course Script')) {
continue;
$output->writeln($link);
$file = $dir . '/' . str_replace(' ', '_', $linkText);
$request = $client->request('GET', $link, ['sink' => $file]);
if ($request->hasHeader('Content-Disposition')) {
$header = $request->getHeader('Content-Disposition')[0];
if (preg_match('/"([^"]+)"/', $header, $m)) {
rename($file, $dir . '/' . $m[1]);
}
}
}
if ($linkText == 'This Video') {
$output->writeln($linkDOM->getAttribute('href'));
$file = $dir . '/' . str_replace(' ', '_', $linkText);
$request = $client->request('GET', $link, ['sink' => $file]);
if ($request->hasHeader('Content-Disposition')) {
$header = $request->getHeader('Content-Disposition')[0];
if (preg_match('/"([^"]+)"/', $header, $m)) {
rename($file, $dir . '/' . $m[1]);
}
}
}
}
$i++;
}
}
}
}
@evlgromov
Copy link

Есть ли возможность добавить описание и краткую инструкцию? Буду бланодарен.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment