-
-
Save anonymous/dd84a3e0ccde6c1e856b33bce5d8b9ea to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace AppBundle\Command; | |
use GuzzleHttp\Client; | |
use GuzzleHttp\Cookie\CookieJar; | |
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; | |
use Symfony\Component\Console\Input\InputArgument; | |
use Symfony\Component\Console\Input\InputInterface; | |
use Symfony\Component\Console\Input\InputOption; | |
use Symfony\Component\Console\Output\OutputInterface; | |
use Symfony\Component\DomCrawler\Crawler; | |
class ParseKnpCommand extends ContainerAwareCommand | |
{ | |
protected function configure() | |
{ | |
$this | |
->setName('app:parse-knp') | |
->setDescription('parse knp') | |
; | |
} | |
protected function execute(InputInterface $input, OutputInterface $output) | |
{ | |
$em = $this->getContainer()->get('doctrine.orm.default_entity_manager'); | |
$base_uri = 'https://knpuniversity.com'; | |
$cookieJar = CookieJar::fromArray([ | |
'PHPSESSID' => 'xxx', | |
'REMEMBERME' => 'xxx', | |
], 'knpuniversity.com'); | |
$baseDir = 'xxx'; | |
$client = new Client(['base_uri' => $base_uri, 'cookies' => $cookieJar]); | |
$request = $client->get('/courses/all'); | |
$crawler = new Crawler($request->getBody()->getContents()); | |
$coursesDOM = $crawler->filter('div.js-isotope-col > a'); | |
$output->writeln('parsed list'); | |
foreach ($coursesDOM as $courseDOM) { | |
/** @var \DOMElement $courseDOM */ | |
$courseHref = $courseDOM->getAttribute('href'); | |
$request = $client->get($courseHref); | |
$crawler = new Crawler($request->getBody()->getContents()); | |
$courseName = $crawler->filter('h1.tuts-header-font-tutorial-overview')->text(); | |
$output->writeln($courseName);continue; | |
$dirname = preg_replace("/[^\w]+/", "", str_replace(' ', '_', $courseName)); | |
$output->writeln($dirname); | |
$dir = $baseDir . '/' . $dirname; | |
if (is_dir($dir)) { | |
continue; | |
} else { | |
mkdir($dir); | |
} | |
$output->writeln('parsed ' . $courseHref . ' page'); | |
// $coursePartsDOM = $crawler->filter('div.chapter-list > ul > li > div > div:first-child > a'); | |
$coursePartsDOM = $crawler->filter('div.chapter-list > ul > li > div > div.col-xs-12 > a'); | |
$i = 0; | |
foreach ($coursePartsDOM as $coursePartDOM) { | |
/** @var \DOMElement $course */ | |
$coursePartHref = $coursePartDOM->getAttribute('href'); | |
$request = $client->get($coursePartHref); | |
$crawler1 = new Crawler($request->getBody()->getContents()); | |
$links = $crawler1->filter('div.download-buy-buttons > ul > li > a'); | |
foreach ($links as $linkDOM) { | |
/** @var \DOMElement $linkDOM */ | |
$linkText = trim($linkDOM->textContent); | |
$link = $linkDOM->getAttribute('href'); | |
if ($i == 0 && ($linkText == 'Course Code' || $linkText == 'Course Script')) { | |
continue; | |
$output->writeln($link); | |
$file = $dir . '/' . str_replace(' ', '_', $linkText); | |
$request = $client->request('GET', $link, ['sink' => $file]); | |
if ($request->hasHeader('Content-Disposition')) { | |
$header = $request->getHeader('Content-Disposition')[0]; | |
if (preg_match('/"([^"]+)"/', $header, $m)) { | |
rename($file, $dir . '/' . $m[1]); | |
} | |
} | |
} | |
if ($linkText == 'This Video') { | |
$output->writeln($linkDOM->getAttribute('href')); | |
$file = $dir . '/' . str_replace(' ', '_', $linkText); | |
$request = $client->request('GET', $link, ['sink' => $file]); | |
if ($request->hasHeader('Content-Disposition')) { | |
$header = $request->getHeader('Content-Disposition')[0]; | |
if (preg_match('/"([^"]+)"/', $header, $m)) { | |
rename($file, $dir . '/' . $m[1]); | |
} | |
} | |
} | |
} | |
$i++; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Есть ли возможность добавить описание и краткую инструкцию? Буду бланодарен.