Skip to content

Instantly share code, notes, and snippets.

@efazati
Created June 7, 2016 07:45
Show Gist options
  • Save efazati/fe35d84ee9d1f760f4b5230ef29609a8 to your computer and use it in GitHub Desktop.
Save efazati/fe35d84ee9d1f760f4b5230ef29609a8 to your computer and use it in GitHub Desktop.
#-*- coding: utf-8 -*-
from lxml import html
import requests
from pymongo import MongoClient
from pprint import pprint
import urllib
import telepot
from datetime import datetime
client = MongoClient('mongodb://localhost:27017/')
token = ''
chat_id = '@...'
url = "http://www.yjc.ir/fa/photo"
db = client['telepy']
article_obj = db.article
element = ''
def data_gathering():
page = requests.get(url)
tree = html.fromstring(page.content)
element = tree
articles = tree.find_class('ax_faal')
result = []
for article in articles:
item = {}
item['img'] = article.cssselect('img')[0].values()[-1]
addr = article.cssselect('a')
if addr:
item['url'] = addr[0].values()[1]
item['title'] = article.cssselect('.title_txt1')[0].text
item['data'] = datetime.now()
item['source'] = 'yjc'
result.append(item)
return result
def submit_data(bot, row):
if store_db(row):
rawimg = urllib.urlopen(row['img'])
print 'submit img url', row['img']
print datetime.now()
return bot.sendPhoto(chat_id, ('newsimage.jpg', rawimg), caption='%s - @axekhabar' % row['title'])
return
def store_db(row):
article = article_obj.find_one({"img": row['img']})
if not article:
id = article_obj.insert_one(row).inserted_id
return True
return False
def submit_alldata(data):
bot = telepot.Bot(token)
me = bot.getMe()
# print me
for row in data:
submit_data(bot, row)
# print row['title']
print 'started', datetime.now()
result = data_gathering()
submit_alldata(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment