Skip to content

Instantly share code, notes, and snippets.

@caub
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save caub/7b980ac18e524a40876c to your computer and use it in GitHub Desktop.
Save caub/7b980ac18e524a40876c to your computer and use it in GitHub Desktop.
web scraping
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException,ElementNotVisibleException
import time
import requests
import json
import urlparse
browser = webdriver.Firefox()
# api_id = '918229bb0dc8940'
# browser.get('https://api.imgur.com/oauth2/authorize?client_id={}&response_type=token'.format(api_id))
# browser.find_element_by_id('allow').click()
# x=browser.current_url
# u=urlparse(x)
# token = urlparse.parse_qs(u.fragment)['access_token']
# print token
# set http header Authorization: Bearer token
# selenium can't do that
# todo run imgur api requests in js with executescript
# or set a proxy that takes token as query string and put it in header
browser.get('http://www.e-adrenaline.fr/terre/actualites/portfolio-quand-les-chevres-defient-la-gravite/4084')
print browser.title.encode('utf-8').strip()
browser.find_element_by_xpath("//div[@id='diapo-1']/a").click()
imgs=[]
x= browser.find_element_by_id('lightbox-image')
while x!=None:
print x.get_attribute('src')
imgs.append(x.get_attribute('src'))
# payload = {'image': url}
# r = requests.post('https://api.imgur.com/3/image', data=payload)
try:
browser.find_element_by_id('lightbox-nav-btnNext').click()
except (NoSuchElementException,ElementNotVisibleException):
print 'stop'
break
time.sleep(5)
x= browser.find_element_by_id('lightbox-image')
print imgs
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import csv
import getpass
import pymongo
import logging
user = '...'
password = '..'
loginuri = "https://..../auth_login"
def historyuri(id):
return "http://..../{}/export/history".format(id)
def tradinguri(id):
return "http://.../{}/export/trading".format(id)
def histcsv(id):
return 'C:/users/{}/downloads/{}.history.csv'.format(getpass.getuser(), id)
def tradingcsv(id):
return 'C:/users/{}/downloads/{}.trading.csv'.format(getpass.getuser(), id)
client = pymongo.MongoClient("mongodb://ddfgdfg@fdgdfg:fdfd/gd")
db = client.jfx
def updatehistory(id, csvreader):
next(csvreader) #first line is headers
for row in csvreader:
com=float(row[9].replace(' ','')) if row[9]!='' else 0
swap=float(row[10].replace(' ','')) if row[10]!='' else 0
pnl=float(row[11].replace(' ','')) if row[11]!='' else 0
o = {
'_id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_'+row[8]+'_'+row[0]+'_'+row[7],
'state': '1',
'account_id': id,
'strategy':'',
'symbol': row[3],
'type': row[1],
'amount': row[2],
'ot': row[0],
'op': row[4],
'sl': row[5],
'tp': row[6],
'ct': row[7],
'cp': row[8],
'commission': row[9],
'interest': row[10],
'pnl': str(pnl+com+swap),
'pips': '0'
}
db.tests.mql5history.save(o)
def updatetrading(id, csvreader):
# next(csvreader) #first line is headers
for row in csvreader:
if (row[0]=='Time'):
continue
print row
com=float(row[8].replace(' ','')) if row[8]!='' else 0
swap=float(row[9].replace(' ','')) if row[9]!='' else 0
pnl=float(row[10].replace(' ','')) if row[10]!='' else 0
o = {
'_id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'__'+row[0]+'_'+row[7],
'state': '1',
'account_id': id,
'strategy':'',
'symbol': row[3],
'type': row[1],
'amount': row[2],
'ot': row[0],
'op': row[4],
'sl': row[5],
'tp': row[6],
'cp': row[7],
'commission': row[8],
'interest': row[9],
'pnl': str(pnl+com+swap),
'pips': '0'
}
db.tests.mql5trading.save(o)
# db.tests.mql5users.save({'users':['317','111']})
# Web scraping
browser = webdriver.Chrome()
browser.get(loginuri)
print browser.title
browser.find_element_by_name('Login').send_keys(user)
browser.find_element_by_name('Password').send_keys(password ) #+ Keys.RETURN
browser.find_element_by_css_selector('.buttonActive').click()
accountIds = db.tests.mql5users.find_one()['users']
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.info(' go ')
## refresh DB each minute
while True:
for id in accountIds:
try:
os.remove(histcsv(id))# remove the file else you'll have (2).csv, (3).csv ...
os.remove(tradingcsv(id))
except OSError, e:
pass
browser.get(historyuri(id))
browser.get(tradinguri(id))
time.sleep(5) # 5 seconds to make sure .csv are downloaded
for id in accountIds:
try:
with open(histcsv(id), 'rb') as csvfile:
updatehistory(id, csv.reader(csvfile, delimiter=';'))
logger.info(' updated history '+id)
with open(tradingcsv(id), 'rb') as csvfile:
updatetrading(id, csv.reader(csvfile, delimiter=';'))
logger.info(' updated trading '+id)
except IOError :
logger.info('file not found '+id)
time.sleep(55)
print(' ---------------- ')
browser.quit()
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import csv
import getpass
import json
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, ForeignKey, Integer, String, Text, desc
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import logging
user = '...'
password = '..'
loginuri = "https://.../auth_login"
def historyuri(id):
return "http://.../{}/export/history".format(id)
def tradinguri(id):
return "http://..../{}/export/trading".format(id)
def histcsv(id):
return 'C:/users/{}/downloads/{}.history.csv'.format(getpass.getuser(), id)
def tradingcsv(id):
return 'C:/users/{}/downloads/{}.trading.csv'.format(getpass.getuser(), id)
Base = declarative_base()
engine = create_engine('sqlite:///:memory:')
# engine = create_engine("mysql://hjkhkhjk:3306/pe")
# engine = create_engine("mysql://{}:{}@{}/{}".format("gdfg","dfgg","gdfgd","dsf"))
logging.basicConfig()
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()
class User(Base):
__tablename__ = 'users'
mql5_id = Column(String(255), primary_key=True)
open_position = Column(Text())
closed_position = Column(Text())
def __repr__(self):
return 'User: {}, {}'.format(self.mql5_id, self.open_positions)
class Order(Base):
__tablename__ = 'orders'
account_id = Column(String(255), ForeignKey('users.mql5_id'))
state = Column(String(12))
strategy = Column(String(255))
symbol = Column(String(255))
type = Column(String(255))
amount = Column(String(255))
ot = Column(String(255))
op = Column(String(255))
sl = Column(String(255))
tp = Column(String(255))
ct = Column(String(255))
cp = Column(String(255))
commission = Column(String(255))
interest = Column(String(255))
pnl = Column(String(255))
pips = Column(String(255))
id = Column(String(255), primary_key=True)
def __repr__(self):
return 'Order: {}'.format(self.id)
Base.metadata.create_all(engine) # necessary the 1st time comment it otherwise
def gethistory(id, csvreader, last_ct):
#next(csvreader) #first line is headers
orders = []
max_ct=last_ct
# todo sort by ct and update only 15
for row in csvreader:
if (row[0]=='Time'):
continue
if row[7] > last_ct:
max_ct = max(max_ct,row[7])
com=float(row[9].replace(' ','')) if row[9]!='' else 0
swap=float(row[10].replace(' ','')) if row[10]!='' else 0
pnl=float(row[11].replace(' ','')) if row[11]!='' else 0
orders.append(Order(
state = '1',
account_id = id,
strategy ='',
symbol = row[3],
type = row[1],
amount = row[2],
ot = row[0],
op = row[4],
sl = row[5],
tp = row[6],
ct = row[7],
cp = row[8],
commission = row[9],
interest = row[10],
pnl = str(pnl+com+swap),
pips = '0',
id =id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_'+row[8]+'_'+row[0]+'_'+row[7]
))
# orders.append({
# 'state': '1',
# 'accountId': id,
# 'strategy':'',
# 'symbol': row[3],
# 'type': row[1],
# 'amount': row[2],
# 'ot': row[0],
# 'op': row[4],
# 'sl': row[5],
# 'tp': row[6],
# 'ct': row[7],
# 'cp': row[8],
# 'commission': row[9],
# 'interest': row[10],
# 'pnl': str(pnl+com+swap),
# 'pips': '0',
# 'id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_'+row[8]+'_'+row[0]+'_'+row[7]
# })
return (orders,max_ct)
def gettrading(id, csvreader):
# next(csvreader) #first line is headers
orders = []
for row in csvreader:
if (row[0]=='Time'):
continue
com=float(row[8].replace(' ','')) if row[8]!='' else 0
swap=float(row[9].replace(' ','')) if row[9]!='' else 0
pnl=float(row[10].replace(' ','')) if row[10]!='' else 0
orders.append({
'state': '0',
'accountId': id,
'strategy':'',
'symbol': row[3],
'type': row[1],
'amount': row[2],
'ot': row[0],
'op': row[4],
'sl': row[5],
'tp': row[6],
'ct': '',
'cp': row[7],
'commission': row[8],
'interest': row[9],
'pnl': str(pnl+com+swap),
'pips': '0',
'id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_;_'+row[0]
})
return orders
users = session.query(User)
if users.count()==0:
session.add_all([User(mql5_id='317'), User(mql5_id='111')]) #
session.commit()
users = session.query(User)
browser = webdriver.Chrome()
browser.get(loginuri)
print browser.title
browser.find_element_by_name('Login').send_keys(user)
browser.find_element_by_name('Password').send_keys(password ) #+ Keys.RETURN
browser.find_element_by_css_selector('.buttonActive').click()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.info(' go ')
## refresh DB each minute
last_ct = '' #keep track last ot to avoid unessecary history merges
while True:
for user in users:
try:
os.remove(histcsv(user.mql5_id))# remove the file else you'll have (2).csv, (3).csv ...
os.remove(tradingcsv(user.mql5_id))
except OSError, e:
pass
browser.get(historyuri(user.mql5_id))
browser.get(tradinguri(user.mql5_id))
time.sleep(15) # 5 seconds to make sure .csv are downloaded
for user in users:
try:
with open(tradingcsv(user.mql5_id), 'rb') as csvfile:
orders = gettrading(user.mql5_id, csv.reader(csvfile, delimiter=';'))
print orders
print '...'
user.open_position = json.dumps(orders)
logger.info(' updated trading {} {}'.format(user.mql5_id,len(orders)))
with open(histcsv(user.mql5_id), 'rb') as csvfile:
(orders,max_ct) = gethistory(user.mql5_id, csv.reader(csvfile, delimiter=';'),last_ct)
print ' ... {} {}'.format(last_ct, max_ct)
last_ct = max_ct
# user.closed_positions = orders
#no cascading for now
session.add_all(orders)
logger.info(' updated history {} {}'.format(user.mql5_id,len(orders)))
session.commit()
except IOError :
logger.info('file not found '+user.mql5_id)
time.sleep(10)
print 'test'
for o in session.query(User).filter(User.mql5_id=='317'):
print o
# for o in session.query(Order).filter(Order.state=='1').order_by(desc(Order.ot)).limit(5):
# print o
time.sleep(35)
print(' ---------------- ')
browser.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment