Skip to content

Instantly share code, notes, and snippets.

@devzom
Last active January 19, 2022 09:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save devzom/f55c092aa013ef81b6e99f74eb2e5ce8 to your computer and use it in GitHub Desktop.
Save devzom/f55c092aa013ef81b6e99f74eb2e5ce8 to your computer and use it in GitHub Desktop.
JS: Nuxt: Generate sitemap UserAgent based on provided array
/*
* @created: 18/12/2021
* @author Jakub [dev.zomerfeld@gmail.com]
* @summary Utils funtion to generate an array of
* UserAgents provided in structured schema
* to use with Robots Nuxt module https://github.com/nuxt-community/robots-module
*/
const agentsArray = [
'Abonti',
'aggregator',
'asterias',
'BDCbot',
'BLEXBot',
'BuiltBotTough',
'Bullseye',
'BunnySlippers',
'ca-crawler',
'CCBot',
'Cegbfeieh',
'CheeseBot',
'CherryPicker',
'coccoc',
'CopyRightCheck',
'cosmos',
'cosmos',
'Crescent',
'discobot',
'DittoSpyder',
'DOC',
'DotBot',
'Download Ninja',
'EasouSpider',
'EmailCollector',
'EmailSiphon',
'EmailWolf',
'EroCrawler',
'Exabot',
'ExtractorPro',
'Ezooms',
'Fasterfox',
'FeedBooster',
'Fetch',
'Foobot',
'Genieo',
'gigabot',
'Harvest',
'hloader',
'HTTrack',
'humanlinks',
'ia_archiver',
'archive.org_bot',
'ia_archiver-web.archive.org',
'ichiro',
'ieautodiscovery',
'Incutio',
'InfoNaviRobot',
'IstellaBot',
'Java/1.',
'JennyBot',
'k2spider',
'Kenjin Spider',
'Keyword Density/0.9',
'larbin',
'LexiBot',
'LexxeBot',
'libWeb',
'LinkextractorPro',
'linko',
'LinkScan/8.1a Unix',
'LinkWalker',
'lmspider',
'LNSpiderguy',
'lwp-trivial',
'magpie',
'Mata Hari',
'MaxPointCrawler',
'MegaIndex',
'memoryBot',
'Microsoft URL Control',
'MIIxpc',
'Mippin',
'Missigua Locator',
'Mister PiX',
'MJ12bot',
'moget',
'MSIECrawler',
'NetAnts',
'NICErsPRO',
'Niki-Bot',
'NPBot',
'Nutch',
'Offline Explorer',
'Openfind',
'panscient.com',
'PHP/5.{',
'ProPowerBot/2.14',
'ProWebWalker',
'Python-urllib',
'QueryN Metasearch',
'RepoMonkey',
'RMA',
'rogerbot',
'SemrushBot',
'serf',
'SISTRIX',
'sitebot',
'sitecheck.Internetseer.com',
'SiteSnagger',
'SnapPreviewBot',
'Sogou',
'SpankBot',
'spanner',
'spbot',
'Spinn3r',
'suzuran',
'Szukacz/1.4',
'Teleport',
'TeleportPro',
'Telesoft',
'The Intraformant',
'TheNomad',
'TightTwatBot',
'Titan',
'toCrawl/UrlDispatcher',
'trovitBot',
'True_Robot',
'turingos',
'TurnitinBot',
'Twiceler',
'UbiCrawler',
'UnisterBot',
'Unknown',
'uptime files',
'URLy Warning',
'VCI',
'WBSearchBot',
'Web Downloader',
'Web Image Collector',
'WebAuto',
'WebBandit',
'WebCopier',
'WebEnhancer',
'WebmasterWorldForumBot',
'WebReaper',
'WebSauger',
'Website Quester',
'Webster Pro',
'WebStripper',
'WebZip',
'wget',
'Wotbox',
'wsr-agent',
'WWW-Collector-E',
'Zao',
'Zealbot',
'Zeus',
'ZyBORG'
]
let generatedUserAgentObjectsArrayWithDisallowedAll = []
function generateAgentObjectForDisallowAll() {
generatedUserAgentObjectsArrayWithDisallowedAll = agentsArray.map(
UserAgent => {
return {
UserAgent: UserAgent,
Disallow: '/'
}
}
)
}
generateAgentObjectForDisallowAll()
export default [
/**
* Sitemap default path
* */
{
UserAgent: '*',
Sitemap: () => process.env.SITEMAP_URL,
/**
* Definition:
* Wait a minimum of XX seconds before requesting another URL.
*/
CrawlDelay: 15 // in seconds,
},
/**
* GLOBAL
*/
{
UserAgent: '*',
Disallow: '/*?utm_source=*'
},
{
UserAgent: '*',
Disallow: '/?showContact=true'
},
{
UserAgent: '*',
Disallow: '/?cat=*'
},
{
UserAgent: '*',
Disallow: '/announcement-manager/'
},
{
UserAgent: '*',
Disallow: '/create/'
},
{
UserAgent: '*',
Disallow: '/page/'
},
{
UserAgent: '*',
Disallow: '/*?page=*'
},
{
UserAgent: '*',
Disallow: '/profile/'
},
{
UserAgent: '*',
Disallow: '/span['
},
/**
* SPECIFIC UserAgents
*/
{
UserAgent: 'Mediapartners-Google',
Allow: '/'
},
/* Disallow * */
...generatedUserAgentObjectsArrayWithDisallowedAll
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment