-
-
Save scarcenine/c98b28f098cddf83b89a32841861009f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/usr/lib/python2.7/dist-packages/supervisor/options.py:298: UserWarning: Supervisord is running as root and it is searching for its configuration file in default locations (including its current working directory); you probably want to specify a "-c" argument specifying an absolute path to a configuration file for improved security. | |
'Supervisord is running as root and it is searching ' | |
Deleted status index | |
Creating status index with mapping | |
{"acknowledged":true,"shards_acknowledged":true,"index":"status"} | |
Deleted metrics index | |
{"acknowledged":true}Creating metrics index with mapping | |
{"acknowledged":true}Running: java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/opt/apache-storm-1.2.3 -Dstorm.log.dir=/opt/apache-storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /opt/apache-storm-1.2.3/*:/opt/apache-storm-1.2.3/lib/*:/opt/apache-storm-1.2.3/extlib/*:/home/ubuntu/news-crawler/lib/crawler.jar:/opt/apache-storm-1.2.3/conf:/opt/apache-storm-1.2.3/bin -Dstorm.jar=/home/ubuntu/news-crawler/lib/crawler.jar -Dstorm.dependency.jars= -Dstorm.dependency.artifacts={} org.commoncrawl.stormcrawler.news.CrawlTopology /home/ubuntu/news-crawler/seeds * -conf /home/ubuntu/news-crawler/conf/es-conf.yaml -conf /home/ubuntu/news-crawler/conf/crawler-conf.yaml | |
467 [main] INFO o.c.s.n.CrawlTopology - Injecting seeds from /home/ubuntu/news-crawler/seeds by pattern * | |
471 [main] INFO c.d.s.s.FileSpout - Reading directory: /home/ubuntu/news-crawler/seeds (filter: *) | |
471 [main] INFO c.d.s.s.FileSpout - Input : /home/ubuntu/news-crawler/seeds/feeds.txt | |
499 [main] INFO o.a.s.u.TupleUtils - Enabling tick tuple with interval [15] | |
546 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old null | |
562 [main] INFO o.a.s.StormSubmitter - Generated ZooKeeper secret payload for MD5-digest: -7051331323110786540:-5260238288145818853 | |
638 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : b66f01ec9f84:6627 | |
673 [main] INFO o.a.s.s.a.AuthUtils - Got AutoCreds [] | |
679 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : b66f01ec9f84:6627 | |
701 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - jars... | |
701 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - artifacts... | |
702 [main] INFO o.a.s.StormSubmitter - Dependency Blob keys - jars : [] / artifacts : [] | |
712 [main] INFO o.a.s.StormSubmitter - Uploading topology jar /home/ubuntu/news-crawler/lib/crawler.jar to assigned location: /opt/apache-storm-1.2.3/storm-local/nimbus/inbox/stormjar-2d576c7c-7dae-46e1-aa3e-63546e7dae47.jar | |
1552 [main] INFO o.a.s.StormSubmitter - Successfully uploaded topology jar to assigned location: /opt/apache-storm-1.2.3/storm-local/nimbus/inbox/stormjar-2d576c7c-7dae-46e1-aa3e-63546e7dae47.jar | |
1552 [main] INFO o.a.s.StormSubmitter - Submitting topology NewsCrawl in distributed mode with conf {"http.content.limit":1048576,"fetchInterval.FETCH_ERROR.isFeed=true":4320,"fetcher.max.crawl.delay.force":false,"sitemap.extensions":null,"sitemap.schedule.delay":15,"http.robots.headers.skip":false,"es.status.addresses":"http:\/\/localhost:9200","fetcher.max.throttle.sleep":-1,"fetcher.server.delay.force":true,"metadata.persist":["_redirTo","fetch.statusCode","error.cause","error.source","isSitemap","isSitemapNews","isSitemapIndex","isSitemapVerified","isFeed","numLinks","last-modified","signature","signatureChangeDate","fetchInterval","protocol.etag"],"urlfilters.config.file":"urlfilters.json","fetchInterval.FETCH_ERROR.isSitemap=true":259200,"http.protocol.versions":null,"topology.name":"NewsCrawl","http.accept":"text\/html,application\/xhtml+xml,application\/xml;q=0.9,*\/*;q=0.8","fetcher.threads.number":80,"scheduler.adaptive.fetchInterval.max":129600,"fetchInterval.ERROR.isSitemapNews=true":86400,"indexer.text.maxlength":-1,"fetchInterval.ERROR.isFeed=true":86400,"selenium.capabilities":{"takesScreenshot":false,"loadImages":false,"javascriptEnabled":true},"scheduler.adaptive.fetchInterval.rate.incr":0.5,"parsefilters.config.file":"parsefilters.json","es.status.max.urls.per.bucket":5,"sitemap.sniffContent":true,"indexer.url.fieldname":"url","es.status.bucket.field":"metadata.hostname","topology.max.spout.pending":100,"fetchInterval.DISCOVERED.isSitemap=true":300,"storm.zookeeper.topology.auth.payload":"-7051331323110786540:-5260238288145818853","fetcher.queue.mode":"byHost","fetchInterval.FETCHED.isSitemapIndex=true":10080,"feed.sniffContent":true,"fetchInterval.ERROR.isSitemap=true":259200,"es.status.routing.fieldname":"metadata.hostname","fetchInterval.FETCHED.isFeed=true":720,"es.status.bucket.sort.field":["nextFetchDate","url"],"es.status.sample":false,"scheduler.adaptive.fetchInterval.min":90,"fetchInterval.FETCH_ERROR.isSitemapIndex=true":4320,"robots.cache.spec":"maximumSize=10000,expireAfterWrite=6h","fetcher.metrics.time.bucket.secs":60,"http.trust.everything":true,"fetcher.server.min.delay":0.0,"es.status.concurrentRequests":1,"topology.workers":1,"sitemap.filter.hours.since.modified":720,"max.fetch.errors":3,"protocol.md.prefix":"protocol.","metadata.track.path":true,"fetchInterval.REDIRECTION.isSitemapNews=true":20160,"warc":{"fs.file.impl":"org.apache.hadoop.fs.RawLocalFileSystem"},"es.status.global.sort.field":"nextFetchDate","selenium.instances.num":1,"es.status.routing":true,"fetchInterval.FETCHED.isSitemap=true":259200,"http.robots.403.allow":true,"indexer.text.fieldname":"content","http.agent.name":"Reputation Crawler","http.content.partial.as.trimmed":true,"indexer.md.mapping":["parse.title=title","parse.keywords=keywords","parse.description=description"],"selenium.setScriptTimeout":0,"es.status.index.name":"status","warc.dir":"\/data\/warc","robots.noFollow.strict":true,"topology.debug":false,"fetchInterval.ERROR.isSitemapIndex=true":86400,"scheduler.class":"com.digitalpebble.stormcrawler.persistence.AdaptiveScheduler","sitemap.offset.guess":1024,"fetcher.threads.per.queue":1,"storm.zookeeper.topology.auth.scheme":"digest","robots.error.cache.spec":"maximumSize=10000,expireAfterWrite=1h","fetcher.max.urls.in.queues":6000,"topology.kryo.register":["com.digitalpebble.stormcrawler.Metadata","com.digitalpebble.stormcrawler.Metadata"],"partition.url.mode":"byDomain","spout.ttl.purgatory":1200,"fetchInterval.DISCOVERED.isSitemapIndex=true":0,"es.status.flushInterval":"5s","warc.rotation.policy.max-mb":1023,"es.metrics.index.name":"metrics","scheduler.adaptive.fetchInterval.rate.decr":0.2,"fetcher.timeout.queue":-1,"http.agent.description":"EAFIT university crawler","status.updater.use.cache":false,"http.agent.version":"0.1","fetcher.max.crawl.delay":120,"track.anchors":true,"spout.reset.fetchdate.after":240,"sitemap.discovery":true,"topology.worker.childopts":"-Xmx4g -Djava.net.preferIPv4Stack=true","protocols":"http,https,file","detect.mimetype":true,"fetcher.max.queue.size":10,"fetchInterval.DISCOVERED.isFeed=true":0,"metadata.track.depth":false,"http.accept.language":"en-us,en-gb,en;q=0.7,*;q=0.3","scheduler.adaptive.setLastModified":true,"selenium.implicitlyWait":0,"es.status.max.buckets":200,"fetchInterval.FETCH_ERROR.isSitemapNews=true":4320,"fetchInterval.REDIRECTION.isFeed=true":20160,"es.status.query.timeout":-1,"es.status.recentDate.min.gap":-1,"topology.backpressure.enable":false,"detect.charset.maxlength":10000,"es.status.recentDate.increase":-1,"spout.min.delay.queries":30000,"indexer.canonical.name":"canonical","ui.port":8081,"fetchInterval.REDIRECTION.isSitemapIndex=true":20160,"fetchInterval.FETCHED.isSitemapNews=true":1440,"fetchInterval.DISCOVERED.isSitemapNews=true":0,"warc.rotation.policy.max-minutes":1440,"topology.message.timeout.secs":300,"topology.eventlogger.executors":0,"fetchInterval.error":133920,"fetcher.server.delay":6.0,"http.protocol.implementation":"com.digitalpebble.stormcrawler.protocol.okhttp.HttpProtocol","selenium.delegated.protocol":"com.digitalpebble.stormcrawler.protocol.httpclient.HttpProtocol","parser.emitOutlinks":true,"fetchInterval.default":5256000,"status.updater.unit.round.date":"SECOND","es.status.max.start.offset":500,"http.store.headers":true,"jsoup.treat.non.html.as.error":true,"http.timeout":30000,"https.protocol.implementation":"com.digitalpebble.stormcrawler.protocol.okhttp.HttpProtocol","http.robots.meta.skip":false,"topology.metrics.consumer.register":[{"class":"org.apache.storm.metric.LoggingMetricsConsumer","parallelism.hint":1},{"class":"com.digitalpebble.stormcrawler.elasticsearch.metrics.MetricsConsumer","parallelism.hint":1}],"urlbuffer.class":"com.digitalpebble.stormcrawler.persistence.urlbuffer.SimpleURLBuffer","es.status.bulkActions":500,"es.metrics.addresses":"http:\/\/localhost:9200","fetchInterval.REDIRECTION.isSitemap=true":259200,"file.protocol.implementation":"com.digitalpebble.stormcrawler.protocol.file.FileProtocol","http.agent.url":"https:\/\/eafit.edu.co\/","status.updater.cache.spec":"maximumSize=250000,expireAfterAccess=4h","http.robots.file.skip":false,"feed.filter.hours.since.published":720,"parser.emitOutlinks.max.per.page":-1,"selenium.pageLoadTimeout":-1,"http.agent.email":"oquinte1@eafit.edu.co","fetchInterval.FETCHED.isSitemapVerified=true":1440,"fetchInterval.fetch.error":2880} | |
1553 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old 1.2.3 | |
1975 [main] INFO o.a.s.StormSubmitter - Finished submitting topology: NewsCrawl | |
Running: java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/opt/apache-storm-1.2.3 -Dstorm.log.dir=/opt/apache-storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /opt/apache-storm-1.2.3/*:/opt/apache-storm-1.2.3/lib/*:/opt/apache-storm-1.2.3/extlib/*:/opt/apache-storm-1.2.3/extlib-daemon/*:/opt/apache-storm-1.2.3/conf:/opt/apache-storm-1.2.3/bin org.apache.storm.command.set_log_level NewsCrawl -l crawlercommons.sitemaps.SiteMapParser=ERROR | |
1454 [main] INFO o.a.s.c.set-log-level - Sent log config LogConfig(named_logger_level:{crawlercommons.sitemaps.SiteMapParser=LogLevel(action:UPDATE, target_log_level:ERROR, reset_log_level_timeout_secs:0)}) for topology NewsCrawl | |
1567 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : b66f01ec9f84:6627 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment