Skip to content

Instantly share code, notes, and snippets.

@berlinbrown
Created March 16, 2013 18:20
Show Gist options
  • Save berlinbrown/5177642 to your computer and use it in GitHub Desktop.
Save berlinbrown/5177642 to your computer and use it in GitHub Desktop.
Web crawler fun : more seeds in MYSQL format
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'supremecourt.c-span.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.tocqueville.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'whitehouse.c-span.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'mises.givingplan.net', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'presidentiallibraries.c-span.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'topics.nytimes.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'journalism.nyu.edu', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.baseballhq.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.foxbusiness.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.spiegel.de', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.boston.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.rushlimbaugh.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'smallbusiness.foxbusiness.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.muslims4liberty.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'bitcoin.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.aol.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.forbes.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.infomart.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.fosspatents.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'newsbusters.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'timesmachine.nytimes.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'giving.mit.edu', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'english.kyodonews.jp', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.townhall.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.opencalais.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.jewishworldreview.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.americanpresidents.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.americanwriters.org', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.oregonlive.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'www.macworld.com', '/', 'Y');
insert into bot_crawler_seeds(created_at, scheme, host, path, enabled) values('2013-03-14 03:22:36', 'http', 'wiredbusinessconference.com', '/', 'Y');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment