Skip to content

Instantly share code, notes, and snippets.

@plewandowski
Created February 22, 2015 17:36
Show Gist options
  • Save plewandowski/6bd2b2568824aa7ef9bb to your computer and use it in GitHub Desktop.
Save plewandowski/6bd2b2568824aa7ef9bb to your computer and use it in GitHub Desktop.
crawler services
parameters:
mad.crawler.client.config:
user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0 FirePHP/0.7.4"
base_url: %crawler.base.url%
mad.crawler.config:
crawler_sleep: 5;
item_sleep: 0.5
mad.crawler.main_url_list: [ %crawler.base.url% ]
services:
# client
mad.crawler.content.provider:
class: Madcoders\PriceCrawlerBunlde\Crawler\Content\ContentProvider
arguments: [ %mad.crawler.client.config% ]
# list persister
mad.crawler.list.persister:
class: Madcoders\PriceCrawlerBundle\Crawler\Persister\ListPersister
argumetns: [ @mad.crawler.list.persister.repository ]
mad.crawler.list.persister.repository:
class: Doctrine\ORM\EntityRepository
factory_service: doctrine.orm.default_entity_manager
factory_method: getRepository
arguments:
- Madcoders\PriceCrawlerBundle\Entity\UrlList
# product persister
mad.crawler.product.persister:
class: Madcoders\PriceCrawlerBundle\Crawler\Persister\ProductPersister
argumetns: [ @mad.crawler.list.persister.repository ]
mad.crawler.product.persister.repository:
class: Doctrine\ORM\EntityRepository
factory_service: doctrine.orm.default_entity_manager
factory_method: getRepository
arguments:
- Madcoders\PriceCrawlerBundle\Entity\Product
# main crawler
mad.crawler.main:
class: Madcoders\PriceCrawlerBundle\Crawler
arguments:
- @mad.crawler.content.provider
- @mad.crawler.main.list.provider
- @mad.crawler.main.filter
- @mad.crawler.list.persister
mad.crawler.main.list.provider:
class: Madcoders\PriceCrawlerBundle\Crawler\DocumentList\UrlProvider
arguments: [ %mad.crawler.main_url_list% ]
mad.crawler.main.filter:
class: Madcoders\PriceCrawlerBundle\Crawler\Content\Filter\CategoryFilter
# category crawler
mad.crawler.category:
class: Madcoders\PriceCrawlerBundle\Crawler
arguments:
- @mad.crawler.content.provider
- @mad.crawler.category.list.provider
- @mad.crawler.category.filter
- @mad.crawler.list.persister
mad.crawler.category.list.provider:
class: Madcoders\PriceCrawlerBundle\Crawler\DocumentList\CategoryProvider
mad.crawler.category.filter:
class: Madcoders\PriceCrawlerBundle\Crawler\Content\Filter\ProductFilter
# product details crawler
mad.crawler.category:
class: Madcoders\PriceCrawlerBundle\Crawler
arguments:
- @mad.crawler.content.provider
- @mad.crawler.product.list.provider
- @mad.crawler.product.filter
- @mad.crawler.product.persister
mad.crawler.product.list.provider:
class: Madcoders\PriceCrawlerBundle\Crawler\DocumentList\ProductProvider
mad.crawler.product.filter:
class: Madcoders\PriceCrawlerBundle\Crawler\Content\Filter\ProductDetailsFilter
# crawler manager
mad.crawler:
class: Madcoders\PriceCrawlerBundle\CrawlerManager
arguments:
- [ @mad.crawler.main, @mad.crawler.category, mad.crawler.product ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment