Created
January 13, 2017 12:27
-
-
Save baditaflorin/bb2d6c1081af3ce2289fd5d8f315eb6d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from sqlalchemy.ext.declarative import declarative_base | |
from sqlalchemy import Column, String, Integer, ForeignKey,Boolean,DateTime,BigInteger,create_engine | |
from sqlalchemy.orm import sessionmaker,join | |
from sqlalchemy import update | |
import datetime | |
import time | |
import medium_posts_scrapper_gist as m | |
Base = declarative_base() | |
engine = create_engine("postgresql://postgres:pass@localhost:5432/medium_scrape") | |
class User_Posts_Seed_List(Base): | |
#Create Table | |
__tablename__ = 'medium_posts_seed_list' | |
user_seed_id = Column(BigInteger, autoincrement=True, primary_key=True) | |
#Don`t add a value to the user_info_id to the DB, it will be auto added | |
user_username = Column(String) | |
user_id = Column(String) | |
active = Column(Boolean) | |
processed = Column(Boolean) | |
#Don`t add a value to user_info_id to the DB, it will be auto added | |
data_scraped = Column(DateTime, default=datetime.datetime.utcnow) | |
Base.metadata.create_all(engine) | |
session = sessionmaker() | |
session.configure(bind=engine) | |
s = session() | |
def main(): | |
Base.metadata.create_all(engine) | |
session = sessionmaker() | |
session.configure(bind=engine,expire_on_commit=False) | |
s = session() | |
seed_query = s.query(User_Posts_Seed_List).filter_by(active=True,processed = False).all() | |
for row in seed_query[500:]: | |
try: | |
start_time = time.clock() | |
print "test" | |
print row.user_username | |
user_username = row.user_username | |
m.scrape_articles(user_username) | |
update_statement_ok = update(User_Posts_Seed_List).where(User_Posts_Seed_List.user_username == user_username).values(processed="True", active="True") | |
s.execute(update_statement_ok) | |
s.commit() | |
s.expunge_all() | |
s.close() | |
engine.dispose() | |
print time.clock() - start_time, "seconds" | |
except: | |
print "Error processing " + str(user_username) | |
update_statement_error = update(User_Posts_Seed_List).where(User_Posts_Seed_List.user_username == user_username).values(processed="False", active="False") | |
s.execute(update_statement_error) | |
s.commit() | |
s.close() | |
engine.dispose() | |
#pass | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment