Skip to content

Instantly share code, notes, and snippets.

@romainnorberg
Created May 11, 2018 07:58
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save romainnorberg/b4176e2e90717faded9ffffbbfd0c861 to your computer and use it in GitHub Desktop.
Save romainnorberg/b4176e2e90717faded9ffffbbfd0c861 to your computer and use it in GitHub Desktop.
Php performances test
<?php
$excludeBotsRegex = '/(googlebot|Googlebot-Mobile|Googlebot-Image|Google favicon|Mediapartners-Google|bingbot|slurp|java|wget|curl|Commons-HttpClient|Python-urllib|libwww|httpunit|nutch|phpcrawl|msnbot|jyxobot|FAST-WebCrawler|FAST Enterprise Crawler|biglotron|teoma|convera|seekbot|gigablast|exabot|ngbot|ia_archiver|GingerCrawler|webmon |httrack|webcrawler|grub.org|UsineNouvelleCrawler|antibot|netresearchserver|speedy|fluffy|bibnum.bnf|findlink|msrbot|panscient|yacybot|AISearchBot|IOI|ips-agent|tagoobot|MJ12bot|dotbot|woriobot|yanga|buzzbot|mlbot|yandexbot|purebot|Linguee Bot|Voyager|CyberPatrol|voilabot|baiduspider|citeseerxbot|spbot|twengabot|postrank|turnitinbot|scribdbot|page2rss|sitebot|linkdex|Adidxbot|blekkobot|ezooms|dotbot|Mail.RU_Bot|discobot|heritrix|findthatfile|europarchive.org|NerdByNature.Bot|sistrix crawler|ahrefsbot|Aboundex|domaincrawler|wbsearchbot|summify|ccbot|edisterbot|seznambot|ec2linkfinder|gslfbot|aihitbot|intelium_bot|facebookexternalhit|yeti|RetrevoPageAnalyzer|lb-spider|sogou|lssbot|careerbot|wotbox|wocbot|ichiro|DuckDuckBot|lssrocketcrawler|drupact|webcompanycrawler|acoonbot|openindexspider|gnam gnam spider|web-archive-net.com.bot|backlinkcrawler|coccoc|integromedb|content crawler spider|toplistbot|seokicks-robot|it2media-domain-crawler|ip-web-crawler.com|siteexplorer.info|elisabot|proximic|changedetection|blexbot|arabot|WeSEE:Search|niki-bot|CrystalSemanticsBot|rogerbot|360Spider|psbot|InterfaxScanBot|Lipperhey SEO Service|CC Metadata Scaper|g00g1e.net|GrapeshotCrawler|urlappendbot|brainobot|fr-crawler|binlar|SimpleCrawler|Livelapbot|Twitterbot|cXensebot|smtbot|bnf.fr_bot|A6-Indexer|ADmantX|Facebot|Twitterbot|OrangeBot|memorybot|AdvBot|MegaIndex|SemanticScholarBot|ltx71|nerdybot|xovibot|BUbiNG|Qwantify|archive.org_bot|Applebot|TweetmemeBot|crawler4j|findxbot|SemrushBot|yoozBot|lipperhey|y!j-asr|Domain Re-Animator Bot|AddThis)/ui';
$excludeBotsArray = [
'googlebot',
'Googlebot-Mobile',
'Googlebot-Image',
'Google favicon',
'Mediapartners-Google',
'bingbot',
'slurp',
'java',
'wget',
'curl',
'Commons-HttpClient',
'Python-urllib',
'libwww',
'httpunit',
'nutch',
'phpcrawl',
'msnbot',
'jyxobot',
'FAST-WebCrawler',
'FAST Enterprise Crawler',
'biglotron',
'teoma',
'convera',
'seekbot',
'gigablast',
'exabot',
'ngbot',
'ia_archiver',
'GingerCrawler',
'webmon ',
'httrack',
'webcrawler',
'grub.org',
'UsineNouvelleCrawler',
'antibot',
'netresearchserver',
'speedy',
'fluffy',
'bibnum.bnf',
'findlink',
'msrbot',
'panscient',
'yacybot',
'AISearchBot',
'IOI',
'ips-agent',
'tagoobot',
'MJ12bot',
'dotbot',
'woriobot',
'yanga',
'buzzbot',
'mlbot',
'yandexbot',
'purebot',
'Linguee Bot',
'Voyager',
'CyberPatrol',
'voilabot',
'baiduspider',
'citeseerxbot',
'spbot',
'twengabot',
'postrank',
'turnitinbot',
'scribdbot',
'page2rss',
'sitebot',
'linkdex',
'Adidxbot',
'blekkobot',
'ezooms',
'dotbot',
'Mail.RU_Bot',
'discobot',
'heritrix',
'findthatfile',
'europarchive.org',
'NerdByNature.Bot',
'sistrix crawler',
'ahrefsbot',
'Aboundex',
'domaincrawler',
'wbsearchbot',
'summify',
'ccbot',
'edisterbot',
'seznambot',
'ec2linkfinder',
'gslfbot',
'aihitbot',
'intelium_bot',
'facebookexternalhit',
'yeti',
'RetrevoPageAnalyzer',
'lb-spider',
'sogou',
'lssbot',
'careerbot',
'wotbox',
'wocbot',
'ichiro',
'DuckDuckBot',
'lssrocketcrawler',
'drupact',
'webcompanycrawler',
'acoonbot',
'openindexspider',
'gnam gnam spider',
'web-archive-net.com.bot',
'backlinkcrawler',
'coccoc',
'integromedb',
'content crawler spider',
'toplistbot',
'seokicks-robot',
'it2media-domain-crawler',
'ip-web-crawler.com',
'siteexplorer.info',
'elisabot',
'proximic',
'changedetection',
'blexbot',
'arabot',
'WeSEE:Search',
'niki-bot',
'CrystalSemanticsBot',
'rogerbot',
'360Spider',
'psbot',
'InterfaxScanBot',
'Lipperhey SEO Service',
'CC Metadata Scaper',
'g00g1e.net',
'GrapeshotCrawler',
'urlappendbot',
'brainobot',
'fr-crawler',
'binlar',
'SimpleCrawler',
'Livelapbot',
'Twitterbot',
'cXensebot',
'smtbot',
'bnf.fr_bot',
'A6-Indexer',
'ADmantX',
'Facebot',
'Twitterbot',
'OrangeBot',
'memorybot',
'AdvBot',
'MegaIndex',
'SemanticScholarBot',
'ltx71',
'nerdybot',
'xovibot',
'BUbiNG',
'Qwantify',
'archive.org_bot',
'Applebot',
'TweetmemeBot',
'crawler4j',
'findxbot',
'SemrushBot',
'yoozBot',
'lipperhey',
'y!j-asr',
'Domain Re-Animator Bot',
'AddThis',
];
$user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36';
function rutime($ru, $rus, $index) {
return ($ru["ru_$index.tv_sec"]*1000 + intval($ru["ru_$index.tv_usec"]/1000))
- ($rus["ru_$index.tv_sec"]*1000 + intval($rus["ru_$index.tv_usec"]/1000));
}
// regex
echo "------------------------------\n";
echo "REGEX ------------------------\n";
echo "------------------------------\n";
$rustart = getrusage();
for ($i=0 ; $i<100000 ; $i++) {
if (preg_match($excludeBotsRegex, $user_agent, $matches)) {
}
}
$ru = getrusage();
echo "This process used " . rutime($ru, $rustart, "utime") .
" ms for its computations\n";
echo "It spent " . rutime($ru, $rustart, "stime") .
" ms in system calls\n";
// array
echo "------------------------------\n";
echo "ARRAY ------------------------\n";
echo "------------------------------\n";
$rustart = getrusage();
for ($i=0 ; $i<100000 ; $i++) {
foreach ($excludeBotsArray as $user_agent) {
if (preg_match('#' . $user_agent . '#ui', $user_agent, $matches)) {
continue;
}
}
}
$ru = getrusage();
echo "This process used " . rutime($ru, $rustart, "utime") .
" ms for its computations\n";
echo "It spent " . rutime($ru, $rustart, "stime") .
" ms in system calls\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment