Skip to content

Instantly share code, notes, and snippets.

@baditaflorin
Created January 13, 2017 12:27
Show Gist options
  • Save baditaflorin/bb2d6c1081af3ce2289fd5d8f315eb6d to your computer and use it in GitHub Desktop.
Save baditaflorin/bb2d6c1081af3ce2289fd5d8f315eb6d to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, String, Integer, ForeignKey,Boolean,DateTime,BigInteger,create_engine
from sqlalchemy.orm import sessionmaker,join
from sqlalchemy import update
import datetime
import time
import medium_posts_scrapper_gist as m
Base = declarative_base()
engine = create_engine("postgresql://postgres:pass@localhost:5432/medium_scrape")
class User_Posts_Seed_List(Base):
#Create Table
__tablename__ = 'medium_posts_seed_list'
user_seed_id = Column(BigInteger, autoincrement=True, primary_key=True)
#Don`t add a value to the user_info_id to the DB, it will be auto added
user_username = Column(String)
user_id = Column(String)
active = Column(Boolean)
processed = Column(Boolean)
#Don`t add a value to user_info_id to the DB, it will be auto added
data_scraped = Column(DateTime, default=datetime.datetime.utcnow)
Base.metadata.create_all(engine)
session = sessionmaker()
session.configure(bind=engine)
s = session()
def main():
Base.metadata.create_all(engine)
session = sessionmaker()
session.configure(bind=engine,expire_on_commit=False)
s = session()
seed_query = s.query(User_Posts_Seed_List).filter_by(active=True,processed = False).all()
for row in seed_query[500:]:
try:
start_time = time.clock()
print "test"
print row.user_username
user_username = row.user_username
m.scrape_articles(user_username)
update_statement_ok = update(User_Posts_Seed_List).where(User_Posts_Seed_List.user_username == user_username).values(processed="True", active="True")
s.execute(update_statement_ok)
s.commit()
s.expunge_all()
s.close()
engine.dispose()
print time.clock() - start_time, "seconds"
except:
print "Error processing " + str(user_username)
update_statement_error = update(User_Posts_Seed_List).where(User_Posts_Seed_List.user_username == user_username).values(processed="False", active="False")
s.execute(update_statement_error)
s.commit()
s.close()
engine.dispose()
#pass
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment