Skip to content

Instantly share code, notes, and snippets.

@kdzwinel
Last active March 8, 2018 13:26
Show Gist options
  • Save kdzwinel/b791967eb66d0e2925ea22c8ca14233a to your computer and use it in GitHub Desktop.
Save kdzwinel/b791967eb66d0e2925ea22c8ca14233a to your computer and use it in GitHub Desktop.
robots.txt errors for top 1000 domains ( http://mobile.httparchive.org/urls.php?start=1&end=1000 )
http://www.netflix.com/
[ { index: 21,
line: 'User Agent: SiteArc',
error: 'Unknown directive' } ]
http://www.salesforce.com/
[ { index: 82,
line: 'Disallow: na1.salesforce.com/help/doc/en/salesforc',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.chase.com/
[ { index: 72,
line: 'Disallow: https://www.chase.com/ccpmweb/shared/doc',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 73,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 74,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 75,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 76,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 77,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 78,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 79,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 80,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 81,
line: 'Disallow: https://www.chase.com/content/dam/chaseo',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.godaddy.com/
[ { index: 80,
line: 'Allow: .js',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 81,
line: 'Allow: .css',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.walmart.com/
[ { index: 21,
line: 'Disallow: api',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 39,
line: 'Disallow: ?country=US',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.mama.cn/
[ { index: 19,
line: 'Disallow: index.php?g=Api&a=RssApi&fromid=3',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.zillow.com/
[ { index: 39,
line: 'Disallow: /search/browse/HDPTree,$Border.$debug$De',
error: '"$" should only be used at the end of the pattern ' },
{ index: 40,
line: 'Disallow: /homedetail/HomeDetail,$Border.$debug$De',
error: '"$" should only be used at the end of the pattern ' },
{ index: 41,
line: 'Disallow: /profiles/ProfileBorderTemplate,$myzillo',
error: '"$" should only be used at the end of the pattern ' } ]
http://www.asos.com/
[ { index: 2,
line: 'Sitemap: /sitemap.ashx',
error: 'Invalid sitemap URL' } ]
http://www.rt.com/
[ { index: 17,
line: 'Allow: .css?',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 38,
line: 'Allow: .css?',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 59,
line: 'Allow: .css?',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.icloud.com/
[ { index: 14,
line: 'Disallow: ajax_sender.html',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.convert2mp3.net/
[ { index: 2,
line: 'Disallow: switchtheme.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 3,
line: 'Disallow: setlang.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 4,
line: 'Disallow: convert.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 5,
line: 'Disallow: convert_en.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 6,
line: 'Disallow: show_video.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 7,
line: 'Disallow: c-mp3.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 8,
line: 'Disallow: c-mp4.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 9,
line: 'Disallow: c-avi.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 10,
line: 'Disallow: c-wmv.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 11,
line: 'Disallow: c-flac.php',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.wittyfeed.com/
[ { index: 44,
line: 'Disallow: push.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 47,
line: 'Disallow: stats.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 50,
line: 'Disallow: www.9viral.biz',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 53,
line: 'Disallow: monitor.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 56,
line: 'Disallow: ping.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 59,
line: 'Disallow: og.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 62,
line: 'Disallow: news.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 65,
line: 'Disallow: lifestyle.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 69,
line: 'Disallow: inspiration.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 73,
line: 'Disallow: celebrities.wittyfeed.com',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.dell.com/
[ { index: 83,
line: 'Disallow: http://www.dell.com/content/products/com',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 84,
line: 'Disallow: http://premier.dell.com/portal/error.asp',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.intuit.com/
[ { index: 11,
line: 'spider/4.0(+http://www.sogou.com/docs/help/webmast',
error: 'Unknown directive' },
{ index: 15,
line: 'Spider/3.0(+http://www.sogou.com/docs/help/webmast',
error: 'Unknown directive' },
{ index: 46,
line: '+http://www.baidu.com/search/spider.html)',
error: 'Unknown directive' },
{ index: 53,
line: '+http://help.soso.com/webspider.htm)',
error: 'Unknown directive' },
{ index: 57,
line: '+http://shoulu.jike.com/spider.html)',
error: 'Unknown directive' } ]
http://www.fedex.com/
[ { index: 87,
line: 'Disallow: http://www.fedex.com/gb/shipping-service',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 88,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/indi',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 89,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/IndS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 90,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/IndS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 91,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/INDS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 92,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/INDS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 93,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/IndS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 94,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/IndS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 95,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/IndS',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 96,
line: 'Disallow: http://www.fedex.com/fr/newcustomer/IndS',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.springer.com/
[ { index: 6,
line: 'Disallow: ?print_view=true',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.17ok.com/
[ { index: 36,
line: 'Disallow: 631',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.hurriyet.com.tr/
[ { index: 20,
line: 'Allow: http://winapp.hurriyet.com.tr/yandexnews.as',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.inquirer.net/
[ { index: 5,
line: 'Disallow: http://bandera.inquirer.net/feed-karera',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 6,
line: 'Disallow: http://bandera.inquirer.net/feed-lotto',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 7,
line: 'Disallow: http://bandera.inquirer.net/feed-full-ho',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 8,
line: 'Disallow: http://bandera.inquirer.net/feed-karera-',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 9,
line: 'Disallow: http://bandera.inquirer.net/bandera-cust',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.corriere.it/
[ { index: 192,
line: 'ACAP-crawler: *',
error: 'Unknown directive' },
{ index: 194,
line: 'ACAP-disallow-crawl: /ZCestino/',
error: 'Unknown directive' },
{ index: 196,
line: 'ACAP-disallow-crawl: /prova/',
error: 'Unknown directive' },
{ index: 198,
line: 'ACAP-disallow-crawl: /b2b/',
error: 'Unknown directive' },
{ index: 200,
line: 'ACAP-disallow-crawl: /_template/',
error: 'Unknown directive' },
{ index: 202,
line: 'ACAP-disallow-crawl: /_docs/',
error: 'Unknown directive' },
{ index: 204,
line: 'ACAP-disallow-crawl: /_temp/',
error: 'Unknown directive' },
{ index: 206,
line: 'ACAP-disallow-crawl: /*_print.html$',
error: 'Unknown directive' } ]
http://www.verizonwireless.com/
[ { index: 40,
line: 'Disallow: email=Responsys',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 254,
line: 'Allow: .js ',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 255,
line: 'Allow: .css ',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.newegg.com/
[ { index: 89,
line: 'Allow: .js ',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 90,
line: 'Allow: .css',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.infusionsoft.com/
[ { index: 2,
line: 'Sitemap: sitemap.xml',
error: 'Invalid sitemap URL' } ]
http://www.grid.id/
[ { index: 2,
line: 'Disallow: ?url=*',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.nordstrom.com/
[ { index: 2,
line: 'Modified: 1-3-2018',
error: 'Unknown directive' } ]
http://www.cdiscount.com/
[ { index: 164,
line: 'Disallow: AdvisorGoToHomePage.html',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.gotporn.com/
[ { index: 52,
line: 'Disallow: gvf/*',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.viva.co.id/
[ { index: 2,
line: 'Disallow:Disallow: /indeks_tahun/all/2008',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.time.com/
[ { index: 19,
line: 'Disallow: ?search',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 21,
line: 'Disallow: ?sa=',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 22,
line: 'Disallow: ?EGOTECpage',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 23,
line: 'Disallow: ?p=',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 24,
line: 'Disallow: ?ref=',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 25,
line: 'Disallow: ?pagespeed',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 26,
line: 'Disallow: ?hc_location',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 32,
line: 'Video Sitemap: http://time.com/video-sitemap.xml',
error: 'Unknown directive' } ]
http://www.bhphotovideo.com/
[ { index: 171,
line: 'Disallow: secure.*',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.visualstudio.com/
[ { index: 5,
line: 'Allow: .js ',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 6,
line: 'Allow: .css',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.bhaskar.com/
[ { index: 3,
line: 'Allow: .js',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 4,
line: 'Allow: .css',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 5,
line: 'Allow: .php',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.sporx.com/
[ { index: 62,
line: 'Disallow: Disallow: /tekvandoda-rusvet-iddialari-b',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.bookmyshow.com/
[ { index: 66,
line: 'Allow: .js',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 67,
line: 'Allow: .css',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 135,
line: 'Sitemap: /sitemap_index.xml',
error: 'Invalid sitemap URL' },
{ index: 136,
line: 'Sitemap: /sitemap_person_index.bms',
error: 'Invalid sitemap URL' } ]
http://www.state.gov/
[ { index: 12,
line: 'Disallow: organization/revisions',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 13,
line: 'Disallow: organization/120733.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 14,
line: 'Disallow: organization/120738.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 15,
line: 'Disallow: organization/120439.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 16,
line: 'Disallow: organization/120754.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 17,
line: 'Disallow: organization/120765.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 18,
line: 'Disallow: organization/20047.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 19,
line: 'Disallow: organization/16192.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 20,
line: 'Disallow: organization/8792.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 21,
line: 'Disallow: organization/120786.pdf',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.dianping.com/
[ { index: 8,
line: 'Disallow: http://www.dianping.com/shop/98376730',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.healthline.com/
[ { index: 418,
line: 'Disallow /',
error: 'Syntax not understood' },
{ index: 421,
line: 'Disallow /',
error: 'Syntax not understood' },
{ index: 424,
line: 'Disallow /',
error: 'Syntax not understood' },
{ index: 427,
line: 'Disallow /',
error: 'Syntax not understood' },
{ index: 430,
line: 'Disallow /',
error: 'Syntax not understood' },
{ index: 433,
line: 'Disallow /',
error: 'Syntax not understood' } ]
http://www.chouftv.ma/
[ { index: 7,
line: 'Disallow: chouftv_index_part2.php',
error: 'Pattern should either be empty, start with "/" or "*" ' },
{ index: 9,
line: 'Disallow: chouftv_index_part2.php',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
http://www.zol.com.cn/
[ { index: 6,
line: 'Disallow:https://www.zol.com.cn/*',
error: 'Pattern should either be empty, start with "/" or "*" ' } ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment