Last active
January 19, 2022 09:08
-
-
Save devzom/f55c092aa013ef81b6e99f74eb2e5ce8 to your computer and use it in GitHub Desktop.
JS: Nuxt: Generate sitemap UserAgent based on provided array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* @created: 18/12/2021 | |
* @author Jakub [dev.zomerfeld@gmail.com] | |
* @summary Utils funtion to generate an array of | |
* UserAgents provided in structured schema | |
* to use with Robots Nuxt module https://github.com/nuxt-community/robots-module | |
*/ | |
const agentsArray = [ | |
'Abonti', | |
'aggregator', | |
'asterias', | |
'BDCbot', | |
'BLEXBot', | |
'BuiltBotTough', | |
'Bullseye', | |
'BunnySlippers', | |
'ca-crawler', | |
'CCBot', | |
'Cegbfeieh', | |
'CheeseBot', | |
'CherryPicker', | |
'coccoc', | |
'CopyRightCheck', | |
'cosmos', | |
'cosmos', | |
'Crescent', | |
'discobot', | |
'DittoSpyder', | |
'DOC', | |
'DotBot', | |
'Download Ninja', | |
'EasouSpider', | |
'EmailCollector', | |
'EmailSiphon', | |
'EmailWolf', | |
'EroCrawler', | |
'Exabot', | |
'ExtractorPro', | |
'Ezooms', | |
'Fasterfox', | |
'FeedBooster', | |
'Fetch', | |
'Foobot', | |
'Genieo', | |
'gigabot', | |
'Harvest', | |
'hloader', | |
'HTTrack', | |
'humanlinks', | |
'ia_archiver', | |
'archive.org_bot', | |
'ia_archiver-web.archive.org', | |
'ichiro', | |
'ieautodiscovery', | |
'Incutio', | |
'InfoNaviRobot', | |
'IstellaBot', | |
'Java/1.', | |
'JennyBot', | |
'k2spider', | |
'Kenjin Spider', | |
'Keyword Density/0.9', | |
'larbin', | |
'LexiBot', | |
'LexxeBot', | |
'libWeb', | |
'LinkextractorPro', | |
'linko', | |
'LinkScan/8.1a Unix', | |
'LinkWalker', | |
'lmspider', | |
'LNSpiderguy', | |
'lwp-trivial', | |
'magpie', | |
'Mata Hari', | |
'MaxPointCrawler', | |
'MegaIndex', | |
'memoryBot', | |
'Microsoft URL Control', | |
'MIIxpc', | |
'Mippin', | |
'Missigua Locator', | |
'Mister PiX', | |
'MJ12bot', | |
'moget', | |
'MSIECrawler', | |
'NetAnts', | |
'NICErsPRO', | |
'Niki-Bot', | |
'NPBot', | |
'Nutch', | |
'Offline Explorer', | |
'Openfind', | |
'panscient.com', | |
'PHP/5.{', | |
'ProPowerBot/2.14', | |
'ProWebWalker', | |
'Python-urllib', | |
'QueryN Metasearch', | |
'RepoMonkey', | |
'RMA', | |
'rogerbot', | |
'SemrushBot', | |
'serf', | |
'SISTRIX', | |
'sitebot', | |
'sitecheck.Internetseer.com', | |
'SiteSnagger', | |
'SnapPreviewBot', | |
'Sogou', | |
'SpankBot', | |
'spanner', | |
'spbot', | |
'Spinn3r', | |
'suzuran', | |
'Szukacz/1.4', | |
'Teleport', | |
'TeleportPro', | |
'Telesoft', | |
'The Intraformant', | |
'TheNomad', | |
'TightTwatBot', | |
'Titan', | |
'toCrawl/UrlDispatcher', | |
'trovitBot', | |
'True_Robot', | |
'turingos', | |
'TurnitinBot', | |
'Twiceler', | |
'UbiCrawler', | |
'UnisterBot', | |
'Unknown', | |
'uptime files', | |
'URLy Warning', | |
'VCI', | |
'WBSearchBot', | |
'Web Downloader', | |
'Web Image Collector', | |
'WebAuto', | |
'WebBandit', | |
'WebCopier', | |
'WebEnhancer', | |
'WebmasterWorldForumBot', | |
'WebReaper', | |
'WebSauger', | |
'Website Quester', | |
'Webster Pro', | |
'WebStripper', | |
'WebZip', | |
'wget', | |
'Wotbox', | |
'wsr-agent', | |
'WWW-Collector-E', | |
'Zao', | |
'Zealbot', | |
'Zeus', | |
'ZyBORG' | |
] | |
let generatedUserAgentObjectsArrayWithDisallowedAll = [] | |
function generateAgentObjectForDisallowAll() { | |
generatedUserAgentObjectsArrayWithDisallowedAll = agentsArray.map( | |
UserAgent => { | |
return { | |
UserAgent: UserAgent, | |
Disallow: '/' | |
} | |
} | |
) | |
} | |
generateAgentObjectForDisallowAll() | |
export default [ | |
/** | |
* Sitemap default path | |
* */ | |
{ | |
UserAgent: '*', | |
Sitemap: () => process.env.SITEMAP_URL, | |
/** | |
* Definition: | |
* Wait a minimum of XX seconds before requesting another URL. | |
*/ | |
CrawlDelay: 15 // in seconds, | |
}, | |
/** | |
* GLOBAL | |
*/ | |
{ | |
UserAgent: '*', | |
Disallow: '/*?utm_source=*' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/?showContact=true' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/?cat=*' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/announcement-manager/' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/create/' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/page/' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/*?page=*' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/profile/' | |
}, | |
{ | |
UserAgent: '*', | |
Disallow: '/span[' | |
}, | |
/** | |
* SPECIFIC UserAgents | |
*/ | |
{ | |
UserAgent: 'Mediapartners-Google', | |
Allow: '/' | |
}, | |
/* Disallow * */ | |
...generatedUserAgentObjectsArrayWithDisallowedAll | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment