Skip to content

Instantly share code, notes, and snippets.

@coline-carle
Created January 16, 2018 13:52
Show Gist options
  • Save coline-carle/f07e32eabfe170ad695bdaadf73162c6 to your computer and use it in GitHub Desktop.
Save coline-carle/f07e32eabfe170ad695bdaadf73162c6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import sys
import requests
import getopt
import glob
import os
import datetime
from db import Page, Session
sess = requests.session()
class FileMirror(object):
key_regexp = r'/(\d+)\.html$'
def __init__(self, session, type="quest", local_path="mirror"):
self.key_regexp = re.compile(self.key_regexp)
self.__local_path = local_path
self.__type = type
self.__fullpath = os.path.join(local_path, type)
self.__session = session
if not os.path.isdir(self.__fullpath):
raise Exception("{} is not a valid directory".format(self.__fullpath))
def scan_local(self):
print("syncing database with already downloaded page (slow operation)")
self.__session.query(Page).update({'localLastMod': None})
self.__session.commit()
files = glob.glob(os.path.join(self.__fullpath, '*'))
for file in files:
db_key = self.get_key_from_filename(file)
page = self.__session.query(Page).filter(Page.gameID == db_key).one()
page.localLastMod = self.last_mod_datetime(file)
self.__session.commit()
def last_mod_datetime(self, filename):
timestamp = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(timestamp)
def get_key_from_filename(self, filename):
match = self.key_regexp.search(filename)
if match:
return int(match.group(1))
raise Exception("filename {} does not match standard filename pattern".format(filename))
def stats(self):
Page.print_stats(self.__session, self.__type)
def download_outdated(self):
pages = Page.outdated(self.__session, self.__type)
for page in pages:
self.download(page)
def download_missing(self):
pages = Page.missing(self.__session, self.__type)
for page in pages:
self.download(page)
def download(self, page):
try:
response = requests.get(page.loc)
response.raise_for_status()
self.save_page(page, response)
print("Donwloaded: %s" % (page.loc))
except requests.exceptions.HTTPError as err:
print(err)
except requests.exceptions.TooManyRedirects as err:
print(err)
except requests.exceptions.Timeout as err:
print(err)
except requests.exceptions.ConnectionError as err:
print(err)
except requests.exceptions.ProxyError as err:
print(err)
except requests.exceptions.SSLError as err:
print(err)
def get_filename(self, page):
return os.path.join(self.__fullpath, "%d.html" % (page.gameID))
def save_page(self, page, response):
filename = self.get_filename(page)
with open(filename, 'wb') as f:
f.write(response.content)
page.localLastMod = datetime.datetime.now()
self.__session.commit()
def main(argv):
sync = False
try:
opts, args = getopt.getopt(argv, ":s", ["sync"])
except getopt.GetoptError:
print("invalid argument")
sys.exit(1)
for opt, arg in opts:
if opt in("-s", "--sync"):
sync = True
dbsession = Session()
fileMirror = FileMirror(dbsession)
if sync:
fileMirror.scan_local()
sys.exit(0)
fileMirror.stats()
print("downloading missing pages")
fileMirror.download_missing()
print("downloading outdated pages")
fileMirror.download_outdated()
dbsession.close()
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment