Skip to content

Instantly share code, notes, and snippets.

@hallboav
Created January 27, 2019 18:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hallboav/b22e42e2771e820d73243f5c69ff2ec6 to your computer and use it in GitHub Desktop.
Save hallboav/b22e42e2771e820d73243f5c69ff2ec6 to your computer and use it in GitHub Desktop.
Sandman
{
"require": {
"ext-openssl": "*",
"fabpot/goutte": "^3.2",
"symfony/console": "^4.2"
}
}
#!/usr/bin/env php
<?php
use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Console\Input\ArgvInput;
use Symfony\Component\Console\Question\Question;
use Symfony\Component\Console\Output\ConsoleOutput;
use Symfony\Component\Console\Helper\QuestionHelper;
use Symfony\Component\Console\Output\OutputInterface;
require_once 'vendor/autoload.php';
function parsePhoneNumber(string $text): string
{
if (!preg_match('#\(?0?(?P<area_code>\d{2})?\)?\ ?(?P<phone_number>9?\d{4}\-?\d{4})#', $text, $matches)) {
throw new \UnexpectedValueException(sprintf('Falha ao extrair número de telefone do texto: "%s"', $text));
}
// Normalization
$phoneNumber = str_replace('-', '', $matches['phone_number']);
$phoneNumber = str_pad($phoneNumber, 9, '9', STR_PAD_LEFT);
$phoneNumber = sprintf('%s%s', $matches['area_code'], $phoneNumber);
// Adiciona 61 aos que não tem código de área
$phoneNumber = str_pad($phoneNumber, 11, '61', STR_PAD_LEFT);
return $phoneNumber;
}
function decrypt(string $message, string $password, string $method = 'aes-256-cbc'): string
{
// Extração iv e mensagem cifrada
$bytes = base64_decode($message);
$ivlen = openssl_cipher_iv_length($method);
$iv = substr($bytes, 0, $ivlen);
$data = substr($bytes, $ivlen - 1);
return openssl_decrypt($data, $method, $password, 0, $iv);
}
try {
$input = new ArgvInput();
$output = new ConsoleOutput();
$filename = '.output.csv';
if (file_exists($filename)) {
throw new \RuntimeException(sprintf('Arquivo "%s" já existe', $filename));
}
$fp = fopen($filename, 'w');
if ($fp === false) {
throw new \RuntimeException(sprintf('Não foi possível criar o arquivo "%s"', $filename));
}
$question = new Question('in-pom-pom in-pom-pom: ');
$question->setHidden(true);
$question->setHiddenFallback(false);
$questionHelper = new QuestionHelper();
$password = $questionHelper->ask($input, $output, $question);
$message = 'YZWGu5DByd7jjGlDRit2qXJ1djVEYUIxMkNFbnJ5TnJxUzI1THpiRWMvL2xCU1lMUUt0Q29EY3RCQ1ZiSlFpUUN6K1Njcy95aEllWEEyZkZjMlliQ0lENU9oMXgzcHdXTG5UWnl3PT0=';
$url = decrypt($message, $password);
$client = new Client();
$crawler = $client->request('GET', $url);
$message = '+AkTQka7WtX6CqGr3HYVjFpWSG9MMkk3TkZ4OUdTZEpoSEhNMWZmVThyaGVrYkw3RC9ONTRuc01hYUx5blpsbi9mamE0ajBxbVdmMTNycVg=';
$ads = $crawler->filter(decrypt($message, $password));
$output->writeln(sprintf('%d resultados encontrados', $ads->count()));
$headers = 'Name,Given Name,Additional Name,Family Name,Yomi Name,Given Name Yomi,Additional Name Yomi,Family Name Yomi,Name Prefix,Name Suffix,Initials,Nickname,Short Name,Maiden Name,Birthday,Gender,Location,Billing Information,Directory Server,Mileage,Occupation,Hobby,Sensitivity,Priority,Subject,Notes,Language,Photo,Group Membership,E-mail 1 - Type,E-mail 1 - Value,E-mail 2 - Type,E-mail 2 - Value,Phone 1 - Type,Phone 1 - Value,Phone 2 - Type,Phone 2 - Value,Website 1 - Type,Website 1 - Value';
fwrite($fp, sprintf('%s%s', $headers, PHP_EOL));
$infoDivId = decrypt('f0Ya36+UWgxMf3oNSuPipnI4ZjUvVG8wQitKdDc3SGF4czBTU1E9PQ==', $password);
$ads->each(function (Crawler $anchor) use ($client, $infoDivId, $fp) {
foreach ($anchor->links() as $link) {
$profile = $client->click($link);
$profile->filter($infoDivId)->each(function (Crawler $info) use ($link, $fp) {
$headerText = $info->children('h1')->text();
$name = trim(current(explode("\n", $headerText)), "\x20\x9\xa\xd\x0\xb\xc2\xa0");
$infoAnchor = $info->filter('a');
$phoneNumberText = 0 < $infoAnchor->count() ? $infoAnchor->eq(0)->text() : $headerText;
$mainPhoneNumber = parsePhoneNumber($phoneNumberText);
$infoSubheader = $info->filter('h2');
if (0 < $infoSubheader->count()) {
$infoSubheaderText = $infoSubheader->eq(0)->text();
if (false !== strstr($infoSubheaderText, 'ou')) {
$additionalPhoneNumber = parsePhoneNumber($infoSubheaderText);
}
}
$contact = [
md5($name),
'', // given name
'', // additional name
'', // family name
'', // yomi name
'', // given name yomi
'', // additional name yomi
'', // family name yomi
'', // name prefix
'', // name suffix
'', // initials
'', // nickname
'', // short name
'', // maiden name
'', // birthday
'', // gender
'', // location
'', // billing information
'', // directory server
'', // mileage
'', // occupation
'', // hobby
'', // sensitivity
'', // priority
'', // subject
'', // notes
'', // language
'', // photo
'foo', // group membership
'', // email 1 type
'', // email 1 value
'', // email 2 type
'', // email 2 value
'Mobile',
$mainPhoneNumber,
isset($additionalPhoneNumber) ? 'Mobile' : '',
$additionalPhoneNumber ?? '',
'Work', // website 1 type
base64_encode($link->getUri()), // website 1 value
];
fputcsv($fp, $contact);
});
}
});
$output->writeln('done.');
} catch (\Exception $exception) {
if (isset($fp) && false !== $fp) {
fclose($fp);
}
$output->getErrorOutput()->writeln($exception->getMessage());
exit(-1);
}
chmod +x crawl
./crawl && ls -la
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment