Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@divinity76
Last active December 20, 2023 07:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save divinity76/79efd7b8c0d7849b956cd194659c98e5 to your computer and use it in GitHub Desktop.
Save divinity76/79efd7b8c0d7849b956cd194659c98e5 to your computer and use it in GitHub Desktop.
misc PHP copypasta
<?php
// find . -iname "*.php" -print0 | xargs -0 --max-args=1 --max-procs=$(nproc) '-I{}' sh -c 'php --syntax-check {} || true' | grep --invert-match "^No syntax errors detected in"
function json_encode_pretty($data, int $extra_flags = 0, int $exclude_flags = 0): string
{
// prettiest flags for: 7.3.9
$flags = JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | (defined("JSON_UNESCAPED_LINE_TERMINATORS") ? JSON_UNESCAPED_LINE_TERMINATORS : 0) | JSON_PRESERVE_ZERO_FRACTION | (defined("JSON_THROW_ON_ERROR") ? JSON_THROW_ON_ERROR : 0);
$flags = ($flags | $extra_flags) & ~ $exclude_flags;
return (json_encode($data, $flags));
}
/**
* generate command to echo (binary?) data to stdout
*
* @param string $binary
* the (optionally binary) data to echo
* @param int $max_ish_line_length
* the circa-max line length for the data (PS! it's not always accurate, sometimes it wraps at *circa* this length)
* @return string
*/
function generateBinaryEcho(string $binary, int $max_ish_line_length = 50): string
{
$inner_max_ish_line_length = (- strlen("'\\")) + $max_ish_line_length;
$ret = "";
// http://www.asciitable.com/
$specialAsciiWhitelist = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
$line_length = strlen("echo -ne '");
$translations = [
"\\" => "\\\\",
'\'' => '\'\\\'\'',
"\n" => "\\n",
"\r" => "\\r",
];
for ($i = 0, $imax = strlen($binary); $i < $imax; ++ $i) {
if ($line_length >= $inner_max_ish_line_length) {
$ret .= "'\\\n'";
$line_length = strlen("'");
}
$translation = $binary[$i];
if (isset($translations[$translation])) {
$translation = $translations[$translation];
} elseif (ctype_alnum($translation) || ($i !== 0 && strpos($specialAsciiWhitelist, $translation) !== false)) {
// no action needed
} else {
// some binary-ish or unicode-ish data, hex-escape it..
$translation = bin2hex($translation);
$translation = str_split($translation, 2);
$translation = '\\x' . implode('\\x', $translation);
}
$line_length += strlen($translation);
$ret .= $translation;
}
$ret = "echo -ne '" . $ret . "'";
return $ret;
}
/**
* better version of shell_exec(),
* supporting both stdin and stdout and stderr and os-level return code
*
* @param string $cmd
* command to execute
* @param string $stdin
* (optional) data to send to stdin, binary data is supported.
* @param string $stdout
* (optional) stdout data generated by cmd
* @param string $stderr
* (optional) stderr data generated by cmd
* @param bool $print_std
* (optional, default false) if you want stdout+stderr to be printed while it's running,
* set this to true. (useful for long-running commands)
* @return int
*/
function hhb_exec(string $cmd, string $stdin = "", string &$stdout = null, string &$stderr = null, bool $print_std = false): int
{
$stdouth = tmpfile();
$stderrh = tmpfile();
$descriptorspec = array(
0 => array(
"pipe",
"rb"
), // stdin
1 => array(
"file",
stream_get_meta_data($stdouth)['uri'],
'ab'
),
2 => array(
"file",
stream_get_meta_data($stderrh)['uri'],
'ab'
)
);
$pipes = array();
$proc = proc_open($cmd, $descriptorspec, $pipes);
while (strlen($stdin) > 0) {
$written_now = fwrite($pipes[0], $stdin);
if ($written_now < 1 || $written_now === strlen($stdin)) {
// ... can add more error checking here
break;
}
$stdin = substr($stdin, $written_now);
}
fclose($pipes[0]);
unset($stdin, $pipes[0]);
if (! $print_std) {
$proc_ret = proc_close($proc); // this line will stall until the process has exited.
$stdout = stream_get_contents($stdouth);
$stderr = stream_get_contents($stderrh);
} else {
$stdout = "";
$stderr = "";
stream_set_blocking($stdouth, false);
stream_set_blocking($stderrh, false);
$fetchstd = function () use (&$stdout, &$stderr, &$stdouth, &$stderrh): bool {
$ret = false;
$tmp = stream_get_contents($stdouth); // fread($stdouth, 1); //
if (is_string($tmp) && strlen($tmp) > 0) {
$ret = true;
$stdout .= $tmp;
fwrite(STDOUT, $tmp);
}
$tmp = stream_get_contents($stderrh);// fread($stderrh, 1); //
// var_dump($tmp);
if (is_string($tmp) && strlen($tmp) > 0) {
$ret = true;
$stderr .= $tmp;
fwrite(STDERR, $tmp);
}
return $ret;
};
while (($status = proc_get_status($proc))["running"]) {
if (! $fetchstd()) {
// 100 ms
usleep(100 * 1000);
}
}
$proc_ret = $status["exitcode"];
proc_close($proc);
$fetchstd();
}
fclose($stdouth);
fclose($stderrh);
return $proc_ret;
}
function hhb_exec_parallel(array $cmds, int $max_concurrent_workers = 128, float $sleep_interval_seconds = 0.1): array
{
$ret = [];
$workers = [];
$work = function () use (&$workers, &$ret, &$sleep_interval_seconds): int {
$closed_workers = 0;
if (count($workers) < 1) {
return $closed_workers;
}
for (;;) {
foreach ($workers as $worker_key => &$worker) {
$status = proc_get_status($worker["handle"]);
if ($status["running"]) {
continue;
}
proc_close($worker["handle"]);
++ $closed_workers;
$worker["stdout"] = stream_get_contents($worker["stdout_handle"]);
fclose($worker["stdout_handle"]);
$worker["stderr"] = stream_get_contents($worker["stderr_handle"]);
fclose($worker["stderr_handle"]);
unset($worker["handle"], $worker["stdout_handle"], $worker["stderr_handle"]);
unset($workers[$worker_key]);
$ret[] = $worker;
}
if ($closed_workers > 0) {
return $closed_workers;
}
// all workers are still busy.
usleep((int) ($sleep_interval_seconds * 1000000));
}
unreachable();
};
foreach ($cmds as $cmd) {
while (count($workers) >= $max_concurrent_workers) {
$work();
}
$curr = [
"cmd" => $cmd,
"stdout_handle" => tmpfile(),
"stderr_handle" => tmpfile()
];
$descriptorspec = array(
// if we don't create a stdin, the child will *inherit* ours, we don't want that to happen,
// so we create a stdin just to close it asap.
0 => array(
"pipe",
"rb"
),
1 => array(
"file",
stream_get_meta_data($curr["stdout_handle"])["uri"],
"wb"
), // stdout is a pipe that the child will write to
2 => array(
"file",
stream_get_meta_data($curr["stderr_handle"])["uri"],
"wb"
) // stderr is a file to write to
);
$pipes = [];
$curr["handle"] = proc_open($cmd, $descriptorspec, $pipes);
fclose($pipes[0]);
unset($pipes[0], $pipes);
$workers[] = $curr;
}
while (count($workers) > 0) {
$work();
}
return $ret;
}
function validate_new_username(string $username, string &$error = NULL, int $min_len = 3, int $max_len = 20): bool
{
if ($username !== ltrim($username)) {
$error = "starts with space(s)!";
return false;
}
if ($username !== rtrim($username)) {
$error = "ends with space(s)!";
return false;
}
if (! mb_check_encoding($username, 'UTF-8')) {
$error = 'not valid UTF8!';
return false;
}
if (preg_match('/[\ ]{2,}/u', $username)) {
$error = 'contains repeating spaces!';
return false;
}
if (! preg_match('/^[[:alnum:]\ \-\_]+$/u', $username)) {
$error = 'contains invalid characters!';
return false;
}
$mblen = mb_strlen($username, 'UTF-8');
if ($mblen < $min_len) {
$error = 'username too short, must be at least {$min_len} character(s).';
return false;
}
if ($mblen > $max_len) {
$error = 'username long. can be no longer than {$max_len} character(s).';
return false;
}
//todo: check for duplicate username
$error = '';
return true;
}
function generatePassword(int $length = 14, bool $lowercase = true, bool $uppercase = true, bool $numbers = true, string $additionalCharacters = ""): string
{
if ($length < 0) {
throw new \InvalidArgumentException("length must be >=0");
}
// the following are omitted from dict, because
// they can be easily confused in some fonts: 1IloO0
$dict = "";
if ($lowercase) {
// omitted lo
$dict .= "abcdefghijkmnpqrstuvwxyz";
}
if ($uppercase) {
// omitted IO
$dict .= "ABCDEFGHJKLMNPQRSTUVWXYZ";
}
if ($numbers) {
// omitted 01
$dict .= "23456789";
}
if (strlen($additionalCharacters) > 0) {
$dict .= $additionalCharacters;
}
if (strlen($dict) < 1) {
throw new \InvalidArgumentException("at least one of lowercase, uppercase, numbers, or addictionalCharacters must be supplied");
}
$randmax = strlen($dict) - 1;
$ret = '';
for ($i = 0; $i < $length; ++ $i) {
$ret .= $dict[random_int(0, $randmax)];
}
return $ret;
// return substr ( strtr ( base64_encode ( random_bytes ( $len ) ), '+/', '-_' ), 0, $len );
}
public function guess_mime(): string
{
if ($this->mime_string !== null) {
return $this->mime_string;
}
if (true) {
// fsck it..
$this->mime_string = "text/plain; charset=UTF-8";
return $this->mime_string;
}
if ($this->paste_type === $this::PASTE_TYPE_FILE) {
$cmd = implode(" ", array(
'file',
'--brief',
'--mime-type',
'--mime-encoding',
escapeshellarg($this->upload_uri)
));
$this->mime_string = shell_exec($cmd);
return $this->mime_string;
} elseif ($this->paste_type === $this::PASTE_TYPE_STRING) {
// o god, unidirectional communication is so much
// more difficult, shell_exec() is way easier than proc_open
$cmd = implode(" ", array(
'file',
'--brief',
'--mime-type',
'--mime-encoding',
'-'
));
$descriptorspec = array(
0 => array(
"pipe",
"rb"
),
1 => array(
"pipe",
"wb"
),
2 => array(
"pipe",
"wb"
)
);
$pipes = [];
$proc = proc_open($cmd, $descriptorspec, $pipes);
try {
fwrite_all($pipes[0], $this->content);
} catch (\Throwable $ex) {
// we couldn't write all, but we don't care.
unset($ex);
}
fclose($pipes[0]);
unset($pipes[0]);
stream_set_blocking($pipes[1], true);
$stdout = "";
while (($status = proc_get_status($proc))["running"]) {
$tmp = stream_get_contents($pipes[1]);
if (is_string($tmp)) {
$stdout .= $tmp;
}
}
unset($status);
$tmp = stream_get_contents($pipes[1]);
if (is_string($tmp)) {
$stdout .= $tmp;
}
fclose($pipes[1]);
$stderr = stream_get_contents($pipes[2]);
fclose($pipes[2]);
unset($pipes[1], $pipes[2], $pipes);
if (! empty($stderr)) {
throw new \LogicException("file wrote to stderr, wtf? cmd: {$cmd} stderr: {$stderr} ");
}
proc_close($proc);
$this->mime_string = $stdout;
return $this->mime_string;
}
throw new \LogicException("unknown paste type!");
}
}
/**
* load HTML as UTF-8, but do not create empty "white space nodes",
* for example <head> <title></title></head> will normally create 3 nodes, one node to remember that there is a space between <head> and <title> ...
* this function does not remember the spaces.. which makes DOM navigation easier, and more like how it works in web browsers
* (eg browser and this function: head->firstChild. DOMDocument: head->firstChild->nextSibling.)
*
* @param string $html
* @param int $extra_flags for DOMDocument::loadHTML
* @param int $exclude_flags exclude flags from DOMDocument::loadHTML
* @return \DOMDocument
*/
function loadHTML_noemptywhitespace(string $html, int $extra_flags = 0, int $exclude_flags = 0): \DOMDocument
{
$flags = LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS | LIBXML_NONET;
$flags = ($flags & ~ $exclude_flags) | $extra_flags;
$domd = new \DOMDocument();
$domd->preserveWhiteSpace = false;
@$domd->loadHTML('<?xml encoding="UTF-8">' . $html, $flags);
$removeAnnoyingWhitespaceTextNodes = function (\DOMNode $node) use (&$removeAnnoyingWhitespaceTextNodes): void {
if ($node->hasChildNodes()) {
// Warning: it's important to do it backwards; if you do it forwards, the index for DOMNodeList might become invalidated;
// that's why i don't use foreach() - don't change it (unless you know what you're doing, ofc)
for ($i = $node->childNodes->length - 1; $i >= 0; --$i) {
$removeAnnoyingWhitespaceTextNodes($node->childNodes->item($i));
}
}
if ($node->nodeType === XML_TEXT_NODE && !$node->hasChildNodes() && !$node->hasAttributes() && ! strlen(trim($node->textContent))) {
//echo "Removing annoying POS";
// var_dump($node);
$node->parentNode->removeChild($node);
} //elseif ($node instanceof DOMText) { echo "not removed"; var_dump($node, $node->hasChildNodes(), $node->hasAttributes(), trim($node->textContent)); }
};
$removeAnnoyingWhitespaceTextNodes($domd);
return $domd;
}
/**
* fetch all urls in parallel,
* warning: all urls must be unique..
*
* @param array $urls_unique
* urls to fetch
* @param int $max_connections
* (optional, default unlimited) max simultaneous connections
* (some websites will auto-ban you for "ddosing" if you send too many requests simultaneously,
* and some wifi routers will get unstable on too many connectionis.. )
* @param array $additional_curlopts
* (optional) set additional curl options here, each curl handle will get these options
* @throws RuntimeException on curl_multi errors
* @throws RuntimeException on curl_init() / curl_setopt() errors
* @return array(url=>response,url2=>response2,...)
*/
function curl_fetch_multi_2(array $urls_unique, int $max_connections = 100, array $additional_curlopts = null)
{
// $urls_unique = array_unique($urls_unique);
$ret = array();
$mh = curl_multi_init();
// $workers format: [(int)$ch]=url
$workers = array();
$max_connections = min($max_connections, count($urls_unique));
$unemployed_workers = array();
for ($i = 0; $i < $max_connections; ++ $i) {
$unemployed_worker = curl_init();
if (! $unemployed_worker) {
throw new \RuntimeException("failed creating unemployed worker #" . $i);
}
$unemployed_workers[] = $unemployed_worker;
}
unset($i, $unemployed_worker);
$work = function () use (&$workers, &$unemployed_workers, &$mh, &$ret): void {
assert(count($workers) > 0, "work() called with 0 workers!!");
$still_running = null;
for (;;) {
do {
$err = curl_multi_exec($mh, $still_running);
} while ($err === CURLM_CALL_MULTI_PERFORM);
if ($err !== CURLM_OK) {
$errinfo = [
"multi_exec_return" => $err,
"curl_multi_errno" => curl_multi_errno($mh),
"curl_multi_strerror" => curl_multi_strerror($err)
];
$errstr = "curl_multi_exec error: " . str_replace([
"\r",
"\n"
], "", var_export($errinfo, true));
throw new \RuntimeException($errstr);
}
if ($still_running < count($workers)) {
// some workers has finished downloading, process them
// echo "processing!";
break;
} else {
// no workers finished yet, sleep-wait for workers to finish downloading.
// echo "select()ing!";
curl_multi_select($mh, 1);
// sleep(1);
}
}
while (false !== ($info = curl_multi_info_read($mh))) {
if ($info['msg'] !== CURLMSG_DONE) {
// no idea what this is, it's not the message we're looking for though, ignore it.
continue;
}
if ($info['result'] !== CURLM_OK) {
$errinfo = [
"effective_url" => curl_getinfo($info['handle'], CURLINFO_EFFECTIVE_URL),
"curl_errno" => curl_errno($info['handle']),
"curl_error" => curl_error($info['handle']),
"curl_multi_errno" => curl_multi_errno($mh),
"curl_multi_strerror" => curl_multi_strerror(curl_multi_errno($mh))
];
$errstr = "curl_multi worker error: " . str_replace([
"\r",
"\n"
], "", var_export($errinfo, true));
throw new \RuntimeException($errstr);
}
$ch = $info['handle'];
$ch_index = (int) $ch;
$url = $workers[$ch_index];
$ret[$url] = curl_multi_getcontent($ch);
unset($workers[$ch_index]);
curl_multi_remove_handle($mh, $ch);
$unemployed_workers[] = $ch;
}
};
$opts = array(
CURLOPT_URL => '',
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_ENCODING => ''
);
if (! empty($additional_curlopts)) {
// i would have used array_merge(), but it does scary stuff with integer keys.. foreach() is easier to reason about
foreach ($additional_curlopts as $key => $val) {
$opts[$key] = $val;
}
}
foreach ($urls_unique as $url) {
while (empty($unemployed_workers)) {
$work();
}
$new_worker = array_pop($unemployed_workers);
$opts[CURLOPT_URL] = $url;
if (! curl_setopt_array($new_worker, $opts)) {
$errstr = "curl_setopt_array failed: " . curl_errno($new_worker) . ": " . curl_error($new_worker) . " " . var_export($opts, true);
throw new RuntimeException($errstr);
}
$workers[(int) $new_worker] = $url;
curl_multi_add_handle($mh, $new_worker);
}
while (count($workers) > 0) {
$work();
}
foreach ($unemployed_workers as $unemployed_worker) {
curl_close($unemployed_worker);
}
curl_multi_close($mh);
return $ret;
}
function strtobits(string $str): string
{
$ret = "";
for ($i = 0; $i < strlen($str); ++ $i) {
$ord = ord($str[$i]);
for ($bitnum = 7; $bitnum >= 0; -- $bitnum) {
if ($ord & (1 << $bitnum)) {
$ret .= "1";
} else {
$ret .= "0";
}
}
}
return $ret;
}
function bitstostr(string $bits):string{
$bits = strrev($bits); // tood: fixme: this really isn't required
$ret = "";
for($i=0;$i<strlen($bits);$i+=8){
$chr = 0;
for($ii = 0; $ii <8; ++ $ii){
if($bits[$i+$ii] === '1'){
$chr = $chr | (1 << $ii);
}
}
$ret.=chr($chr);
}
$ret = strrev($ret); // todo: fimxe: not required..
return $ret;
}
function git_diff_pretty_file_code(string $file_path, string $new_code):string
{
// using a file avoids a theoretical issue that is difficult to explain:
// git reach end of file_path content but not end of stdin content,
// and starts writing everything to stdout as diff, but the stdout buffer is full,
// meanwhile php is waiting for proc_close() before running stream_get_contents($pipes[1]),
// which could possibly result in a deadlock
// (git wait for php to read stdout buffer, and php wait for git to exit)
$stdout_handle = tmpfile();
$stderr_handle = tmpfile();
$descriptorspec = array(
0 => array(
"pipe",
"rb"
),
1 => $stdout_handle,
2 => $stderr_handle
);
$cmd = implode(" ", array(
"git diff",
"--text",
"--no-index",
"--color=always",
"--exit-code",
escapeshellarg($file_path),
"-"
));
$pipes = [];
$proc = proc_open($cmd, $descriptorspec, $pipes);
fwrite_all($pipes[0], $new_code);
fclose($pipes[0]);
$ret = proc_close($proc);
rewind($stderr_handle);
$stderr = stream_get_contents($stderr_handle);
fclose($stderr_handle);
rewind($stdout_handle);
$stdout = stream_get_contents($stdout_handle);
fclose($stdout_handle);
// thanks to "--exit-code", 0 means equal, 1 means they differ, anything else means error
// and without --exit-code, git sometimes unexpectedly returns 1 instead of 0,
// and i can't find any documentation on what
// "return 1 without --exit-code, and nothing to stderr" actually means..
if ($ret !== 0 && $ret !== 1) {
var_export([
'git stderr' => $stderr,
'git stdout' => $stdout,
'git ret' => $ret
]);
throw new \LogicException("git diff returned non-zero: " . var_export($ret, true));
}
return $stdout;
}
/**
* create an absolute path,
* supporting both windows-style (for Arild) and linux-style paths,
* and ".." and "."
* dirs will end with a directory separator.
*
* @param string ...$args
* the arguments that will create the folder
* @throws \InvalidArgumentException if the path does not exist..
* @return string absolute path
*/
function pathify(bool $mustExist, string ...$args): string
{
$end_with_dir_separator = (function () use (&$args): bool {
// php<7.3.0 hack to get array_key_last()
// (trying to stay php 7.2 compatible for now)
foreach ($args as $last_key => $unused) {}
unset($unused);
if (! isset($last_key)) {
return false;
}
return ($args[$last_key] === '/' || $args[$last_key] === '\\');
})();
$ret = str_replace([
'\\',
'/'
], DIRECTORY_SEPARATOR, implode(DIRECTORY_SEPARATOR, $args));
do {
$cpy = $ret;
$ret = strtr($ret, array(
'\\\\' => '\\',
'//' => '/'
));
} while ($ret !== $cpy);
$cpy = $ret;
$ret = realpath($ret);
if (false === $ret) {
if ($mustExist) {
throw new \InvalidArgumentException("path does not exist: {$cpy}");
} else {
$ret = $cpy;
}
}
if (strlen($ret) >= 2 && $end_with_dir_separator) {
$ret .= DIRECTORY_SEPARATOR;
}
return $ret;
}
/**
* convert seconds to a human-readable format
* example: 123456789.1 seconds
* brief=false: 3 years 10 months 28 days 21 hours 33 minutes 9.1 seconds
* brief=true: 3y10M28d21h33m9.1s
* default format is brief=false
*
* @param float $seconds
* @param bool $brief
* @return string
*/
function secondsToHumanReadable(float $seconds, bool $brief = false): string
{
$frac = $seconds - (int) $seconds;
if ($frac !== 0.0) {
$frac = explode(".", number_format($frac, 3, '.', ''))[1];
if (1 || ! $brief) {
if ($frac === "000") {
$frac = 0.0;
} else {
$frac = rtrim($frac, '0');
}
}
}
$seconds = (int) $seconds;
if ($seconds === 0) {
// special case
if ($brief) {
if ($frac !== 0.0) {
return "0." . $frac . "s";
} else {
return "0s";
}
} else {
if ($frac !== 0.0) {
return "0." . $frac . " seconds";
} else {
return '0 seconds';
}
}
}
$dtF = new \DateTime('@0');
$dtT = new \DateTime("@$seconds");
$ret = '';
$diff = $dtF->diff($dtT);
foreach (array(
'y' => 'year',
'm' => 'month',
'd' => 'day',
'h' => 'hour',
'i' => 'minute',
's' => 'second'
) as $time => $timename) {
if ($diff->$time === 0) {
continue;
}
if ($brief) {
$ret .= $diff->$time;
if ($time === "s" && $frac !== 0.0) {
$ret .= "." . $frac;
} elseif ($time === "m") {
// month...
$time = "M";
} elseif ($time === "i") {
// minute
$time = "m";
}
$ret .= $time;
} else {
$ret .= $diff->$time;
if ($time === "s" && $frac !== 0.0) {
$ret .= "." . $frac;
}
$ret .= ' ' . $timename;
if ($diff->$time !== 1 && $diff->$time !== - 1) {
$ret .= 's';
}
$ret .= ' ';
}
}
if (! $brief) {
$ret = substr($ret, 0, - 1);
}
return $ret;
}
/**
* converts bytes to human readable format
*
* @param int $bytes
* @return string
*/
function bytesToHumanReadable(int $bytes): string
{
if ($bytes >= 1024 * 1024 * 1024 * 1024) {
return number_format($bytes / (1024 * 1024 * 1024 * 1024), 2) . " TB";
}
if ($bytes >= 1024 * 1024 * 1024) {
return number_format($bytes / (1024 * 1024 * 1024), 2) . " GB";
}
if ($bytes >= 1024 * 1024) {
return number_format($bytes / (1024 * 1024), 2) . " MB";
}
if ($bytes >= 1024) {
return number_format($bytes / (1024), 2) . " KB";
}
return number_format($bytes, 2) . " B";
}
function is_port_open(string $hostname, int $port, int &$errno = null, string &$errstr = null, float &$connect_time = null, int $timeout_seconds = 2): bool
{
$errno = null;
$errstr = null;
$connect_time = microtime(true);
$fp = @fsockopen($hostname, $port, $errno, $errstr, $timeout_seconds);
$connect_time = microtime(true) - $connect_time;
if ($fp) {
fclose($fp);
return true;
}
return false;
}
/**
* 1 byte: chr() / pack(C)
* 2 bytes: pack(v)
* 3 bytes: ??? (i don't think pack can do it?)
* 4 bytes: pack(V)
* 5 bytes: ??? (i don't think pack can do it?)
* 6 bytes: ??? (i don't think pack can do it?)
* 7 bytes: ??? (i don't think pack can do it?)
* 8 bytes: pack(P)
*
* @param int $i
* @return string
*/
function stupidpack_le(int $i): string
{
return ($i === 0 ? "\x00" : rtrim(pack('P', $i),"\x00"));
$ret = [
0
];
for (; $i > 0; -- $i) {
// we have to look for the first key that is <255 and increment it, and zero all the previous ones..
// if they all are 255, then we have to zero them all and create a brand new one with value 1..
for ($key = 0; $key < count($ret); ++ $key) {
if ($ret[$key] < 255) {
++ $ret[$key];
continue 2;
} else {
$ret[$key] = 0;
}
}
$ret[] = 1;
}
$realRet = "";
for ($key = 0; $key < count($ret); ++ $key) {
$realRet .= chr($ret[$key]);
}
return $realRet;
}
/**
* improvised/home-made/inferior replica of Laravel's dd/dump-and-die function for debugging,
* does pretty much the same as var_dump($args,__FILE__,__LINE__);die();
* screenshot: https://i.imgur.com/5k0LrDL.png
*
* @param mixed ...$args
* @return never
*/
function dd(...$args)
{
$headers_sent = headers_sent();
if (!$headers_sent) {
header("Content-Type: text/plain; charset=utf-8");
} else {
echo "<pre>\n";
}
$plainText = str_contains(var_export(headers_list(), true), 'text/plain;');
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 2);
$file = $trace[0]['file'];
$line = $trace[0]['line'];
$code = file($file);
$code = implode("\n", array_slice($code, max($line - 3, 0), 5));
echo "dd() called from $file:$line\n";
echo "code:\n";
if ($plainText) {
echo $code, "\n";
} else {
echo htmlspecialchars($code, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE | ENT_DISALLOWED, 'UTF-8', true), "<br/>\n";
}
var_dump(...$args);
die();
}
function pastebinit(string $str): string
{
$len = strlen($str);
if ($len < 1) {
return 'paste is empty....'; // ....
}
$socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP);
if ($socket === false) {
throw new \Exception('socket_create() failed: ' . socket_strerror(socket_last_error()));
}
if (!socket_set_block($socket)) {
throw new \Exception('socket_set_block() failed: ' . socket_strerror(socket_last_error($socket)));
}
$result = socket_connect($socket, 'termbin.com', 9999);
if ($result === false) {
throw new \Exception('socket_connect() failed: ' . socket_strerror(socket_last_error($socket)));
}
while ($len > 0) {
//echo "writing..";
$sent = socket_write($socket, $str, $len);
if ($sent === false || $sent < 1) {
throw new \RuntimeException("failed to write the last {$len} byte(s)!: " . socket_strerror(socket_last_error($socket)));
}
$str = substr($str, $sent);
$len -= $sent;
}
assert($len === 0);
socket_shutdown($socket, 1); // shutdown the socket for writing
$full_url = '';
for (;;) {
$read = [$socket];
$write = null;
$except = [$socket];
$seconds = 5;
$ss = socket_select(
$read,
$write,
$except,
$seconds,
);
if ($ss === false) {
throw new \RuntimeException("socket_select() failed: " . socket_strerror(socket_last_error($socket)));
}
if ($ss === 0) {
//var_dump("ss is 0!",$ss,$read,$url_chunk,$full_url);
break;
}
$url_chunk = "";
socket_recv($socket, $url_chunk, 99, MSG_DONTWAIT);
//var_dump($url_chunk,bin2hex($url_chunk));
if (false === $url_chunk || strlen($url_chunk) < 1) {
//var_dump("empty urlchunk",$url_chunk);
break;
}
$full_url .= $url_chunk;
if (substr($url_chunk, -1) === "\x00") {
break;
}
}
socket_close($socket);
return rtrim($full_url);
}
function int_to_flags_code(int $flags): string
{
$ret = '0';
for ($i = 0; $i < (PHP_INT_SIZE * 8); ++$i) {
if ($flags & (1 << $i)) {
$ret .= " | (1 << $i)";
}
}
if (strlen($ret) !== 1) {
$ret = substr($ret, strlen('0 | '));
}
return $ret;
}
function fwrite_all($fp, string $data): void
{
$total_len = strlen($data);
$remaining = $total_len;
$written_total = 0;
$fwrite_zero_counter = 0;
for (;;) {
$written_now = fwrite($fp, $data);
if ($written_now === 0) {
++$fwrite_zero_counter;
if ($fwrite_zero_counter > 100) {
$errstr = "fwrite failed after {$written_total}/{$total_len} bytes written: fwrite() returned 0 {$fwrite_zero_counter} times in a row.";
throw new RuntimeException($errstr);
}
} else {
$fwrite_zero_counter = 0;
}
if ($written_now === false) {
$errstr = "fwrite failed after {$written_total}/{$total_len} bytes written: " . print_r(error_get_last(), true);
throw new RuntimeException($errstr);
}
$remaining -= $written_now;
if ($remaining === 0) {
return;
}
$written_total += $written_now;
$data = substr($data, $written_now);
}
}
<?php
declare ( strict_types = 1 )
;
error_reporting ( ~ 0 );
function exception_error_handler($severity, $message, $file, $line) {
if (! (error_reporting () & $severity)) {
// This error code is not included in error_reporting
return;
}
throw new ErrorException ( $message, 0, $severity, $file, $line );
}
set_error_handler ( "exception_error_handler" );
class curl_async_stuff {
private $proxy_list_start_port = 9000;
private $proxy_list = array ();
private $mh;
private $workers = [ ];
private $url_cache = [ ];
private $url_cache_db_handle;
private function __construct() {
for($proxynum = 0; $proxynum < 20; ++ $proxynum) {
// DataDirectory
$datadir = "tor_data_dirs" . DIRECTORY_SEPARATOR . $proxynum;
if (! is_dir ( $datadir )) {
mkdir ( $datadir, 0777, true );
}
$datadir = realpath ( $datadir );
$cmd = "tor --SOCKSPort " . ($this->proxy_list_start_port + $proxynum) . " --DataDirectory " . escapeshellarg ( $datadir );
$descriptorspec = array (
0 => array (
"pipe",
"rb"
) // stdin will be inherited if we don't create it, we dont want that, so we create stdin just to close it.
// 1 => array("pipe", "wb"), // stdout is a pipe that the child will write to
// 2 => array("file", "/tmp/error-output.txt", "ab") // stderr is a file to write to
);
$pipes = [ ];
$process = proc_open ( $cmd, $descriptorspec, $pipes );
fclose ( $pipes [0] );
unset ( $pipes );
$this->proxy_list [$proxynum] = $process;
}
unset ( $proxynum, $descriptorspec, $pipes, $cmd, $process );
for($i = 0; $i < 20; ++ $i) {
echo "waiting for tor setup...\n";
sleep ( 1 );
}
$this->mh = curl_multi_init ();
$cache_file = __FILE__ . ".url_cache.sqlite3";
$this->url_cache_db_handle = new \PDO ( 'sqlite:' . $cache_file, '', '', array (
\PDO::ATTR_EMULATE_PREPARES => false,
\PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION,
\PDO::ATTR_DEFAULT_FETCH_MODE => \PDO::FETCH_ASSOC
) );
$this->url_cache_db_handle->exec ( 'CREATE TABLE IF NOT EXISTS
`urlcache` (
id INTEGER PRIMARY KEY,
timestamp INTEGER,
url STRING UNIQUE,
html STRING
);' );
//
//
}
function __destruct() {
echo "destructing!";
$this->block_until_handle_count_max ( 0 );
curl_multi_close ( $this->mh );
foreach ( $this->proxy_list as $proxy ) {
proc_terminate ( $proxy );
proc_close ( $proxy );
}
}
public static function Instance(): self {
static $instance = null;
if ($instance === null) {
$instance = new self ();
}
return $instance;
}
public function fetch_async_callback(string $url, array $additional_curlopts = [ ], callable $finished_callback = null, int $max_parallel = 100): void {
$this->block_until_handle_count_max ( $max_parallel );
$db = $this->url_cache_db_handle;
$cache = $db->query ( "SELECT html FROM urlcache WHERE url = " . $db->quote ( $url ) )->fetch ();
if (isset ( $cache ["html"] )) {
($finished_callback) ( $cache ["html"], null );
return;
}
echo "fetching {$url} (not from cache)\n";
$worker_arr = array (
"handle" => curl_init ( $url ),
"url" => $url,
"finished_callback" => $finished_callback
);
$opts = array (
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_ENCODING => "",
CURLOPT_USERAGENT => "php/" . PHP_VERSION . " libcurl/" . curl_version () ["version"],
CURLOPT_PRE_PROXY => 'SOCKS5://127.0.0.1:' . (random_int ( $this->proxy_list_start_port, $this->proxy_list_start_port + (count ( $this->proxy_list ) - 1) ))
);
foreach ( $additional_curlopts as $key => $overwrite ) {
$opts [$key] = $overwrite;
}
curl_setopt_array ( $worker_arr ['handle'], $opts );
curl_multi_add_handle ( $this->mh, $worker_arr ['handle'] );
$this->workers [( int ) $worker_arr ['handle']] = $worker_arr;
}
public function block_until_handle_count_max(int $max): void {
echo "handle count: " . count ( $this->workers ) . "\n";
while ( count ( $this->workers ) > $max ) {
$this->block_until_at_least_1_download_completed ();
}
}
public function block_until_at_least_1_download_completed(): void {
if (count ( $this->workers ) < 1) {
// ...
return;
}
$closed_at_least_1 = false;
for(;;) {
curl_multi_exec ( $this->mh, $still_running );
while ( $info = curl_multi_info_read ( $this->mh ) ) {
if ($info ['msg'] !== CURLMSG_DONE) {
continue;
}
$closed_at_least_1 = true;
$handle = $info ['handle'];
$content = curl_multi_getcontent ( $handle );
if (! empty ( $content ) && strlen ( $content ) > 1000 && false === strpos ( $content, 'Du har nu överskridit den mängd sökningar' )) {
$stm = $this->url_cache_db_handle->prepare ( "INSERT INTO urlcache (timestamp, url, html) VALUES(:timestamp,:url,:html); " );
$stm->execute ( array (
":timestamp" => time (),
"url" => $this->workers [( int ) $handle] ['url'],
":html" => $content
) );
} else {
$rate_limit_reason = "";
if (empty ( $content )) {
$rate_limit_reason = "no response...";
} elseif (strlen ( $content ) <= 1000) {
$rate_limit_reason = "response was too small (" . strlen ( $content ) . " bytes)";
} elseif (false !== strpos ( $content, 'Du har nu överskridit den mängd sökningar' )) {
$rate_limit_reason = "'Du har nu överskridit den mängd sökningar' ";
} else {
$rate_limit_reason = "UNREACHABLE CODE REACHED FIXME WTF";
}
throw new \LogicException ( "rate-limited on " . $this->workers [( int ) $handle] ['url'] . ": " . $rate_limit_reason );
}
$cb = $this->workers [( int ) $handle];
if (isset ( $cb ["finished_callback"] )) {
($cb ['finished_callback']) ( $content, $handle );
}
unset ( $this->workers [( int ) $handle] );
curl_multi_remove_handle ( $this->mh, $handle );
curl_close ( $handle );
}
if ($closed_at_least_1) {
return;
} else {
curl_multi_select ( $this->mh );
}
}
}
}
function get_all_companies_urls(): void {
$chars = "abcdefghijklmnopqrstuvwxyz0123456789";
$responses = array ();
for($i = 0, $imax = strlen ( $chars ); $i < $imax; ++ $i) {
$url = 'https://www.merinfo.se/search?who=' . urlencode ( $chars [$i] ) . '&d=c&page=1';
curl_async_stuff::Instance ()->fetch_async_callback ( $url, [ ], function ($html, $ch) use (&$responses, $url): void {
echo "loaded {$url}!\n";
$responses [$url] = $html;
} );
}
curl_async_stuff::Instance ()->block_until_handle_count_max ( 0 );
$url_keys = array_keys ( $responses );
foreach ( $url_keys as $urlkey ) {
$response = $responses [$urlkey];
// Visar 1
// till 20
// av 7386 träffar.
$rex = '/Visar\s+(?<current_start_result>\d+)\s+till\s+(?<current_end_result>\d+)\s+av\s+(?<total_results>\d+)\s+träffar\./u';
$matches = [ ];
if (! preg_match ( $rex, $response, $matches )) {
throw new \LengthException ( "extract-match-count-rex failed on {$url}" );
}
var_dump ( $matches );
$total_pages = ( int ) floor ( (( int ) ($matches ["total_results"])) / 20 );
for($pagenum = 2; $pagenum < $total_pages; ++ $pagenum) {
// https://www.merinfo.se/search?who=a&d=c&page=2
$url = rtrim ( $urlkey, '0123456789' ) . $pagenum;
curl_async_stuff::Instance ()->fetch_async_callback ( $url, [ ], function ($response, $ch) use ($url, &$responses): void {
$responses [$url] = $response;
} );
curl_async_stuff::Instance ()->block_until_handle_count_max ( 50 );
}
}
curl_async_stuff::Instance ()->block_until_handle_count_max ( 0 );
var_dump ( $responses [array_key_last ( $responses )] );
}
get_all_companies_urls ();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment