Skip to content

Instantly share code, notes, and snippets.

@tomekwojcik
Last active January 18, 2019 21:19
Show Gist options
  • Save tomekwojcik/9015b7213179cae06b7c to your computer and use it in GitHub Desktop.
Save tomekwojcik/9015b7213179cae06b7c to your computer and use it in GitHub Desktop.
Comparison of tagging approaches
*.pyc
*.pyo
fixture.json

Comparison of tagging approaches

This gist contains code required to generate data and initialize DBs required by the "Fun with PostgreSQL: Tagging blog posts" blog post on BTHLabs blog.

Requirements

  • Python 2.7,
  • PostgreSQL DB.

Usage

To simplify the process it's recommended to create a separate virtual environment and execute the commands below after activating it.

  1. $ pip install -r requirements.txt
  2. python make_fixture.py
  3. python init_flat.py <pgsql_db_url>
  4. python init_relational.py <pgsql_db_url>

License

The contents of this gist is licensed under. See LICENSE for more info.

# -*- coding: utf-8 -*-
"""
init_flat.py
============
This script initializes table with ARRAY-based tags. The table (named
*flat_posts*) will be dropped if necessary.
Usage
-----
``python init_flat.py <pgsql_db_url>``
The *pgsql_db_url* command line argument should be a SQLAlchemy-compatible
DB URI.
"""
import codecs
import json
import os
import sys
from sqlalchemy import Column, Integer, String, Text, create_engine, text
from sqlalchemy.dialects.postgresql import ARRAY, array
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
SQL_IDX_TAGS = (
"""CREATE INDEX flat_posts_idx_tags ON flat_posts USING GIN (tags)"""
)
class FlatPost(Base):
__tablename__ = 'flat_posts'
id = Column(Integer, primary_key=True)
slug = Column(String(255), unique=True)
title = Column(Text)
tags = Column(ARRAY(Text))
def __repr__(self):
_dict = {
'id': self.id,
'slug': self.slug,
'title': self.title,
'tags': self.tags
}
return json.dumps(_dict)
if __name__ == '__main__':
if len(sys.argv) == 1:
raise RuntimeError('DB URI not provided')
if not os.path.isfile('fixture.json'):
raise RuntimeError('fixture.json not found')
fixture = None
with codecs.open('fixture.json', 'r', 'utf-8') as fixture_file:
fixture = json.loads(fixture_file.read())
engine = create_engine(sys.argv[1])
Session = sessionmaker(bind=engine)
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)
session = Session()
session.execute(text(SQL_IDX_TAGS))
session.commit()
for post_fixture in fixture['posts']:
flat_post = FlatPost(**post_fixture)
session.add(flat_post)
session.commit()
session.close()
# -*- coding: utf-8 -*-
"""
init_flat.py
============
This script initializes tables with relational tags. The tables (named
*posts*, *tags* and *posts_to_tags*) will be dropped if necessary.
Usage
-----
``python init_relational.py <pgsql_db_url>``
The *pgsql_db_url* command line argument should be a SQLAlchemy-compatible
DB URI.
"""
import codecs
import json
import os
import sys
from sqlalchemy import Column, ForeignKey, Integer, String, Table, Text,\
create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import backref, relationship, sessionmaker
Base = declarative_base()
post_to_tag = Table(
'posts_to_tags', Base.metadata,
Column('post_id', Integer, ForeignKey('posts.id')),
Column('tag_id', Integer, ForeignKey('tags.id'))
)
class Post(Base):
__tablename__ = 'posts'
id = Column(Integer, primary_key=True)
slug = Column(String(255), unique=True)
title = Column(Text)
def __repr__(self):
_dict = {
'id': self.id,
'slug': self.slug,
'title': self.title,
'tags': [tag.name for tag in self.tags]
}
return json.dumps(_dict)
tags = relationship('Tag', secondary=post_to_tag, backref='posts')
class Tag(Base):
__tablename__ = 'tags'
id = Column(Integer, primary_key=True)
name = Column(String(255), unique=True)
def __repr__(self):
_dict = {
'id': self.id,
'name': self.name
}
return json.dumps(_dict)
if __name__ == '__main__':
if len(sys.argv) == 1:
raise RuntimeError('DB URI not provided')
if not os.path.isfile('fixture.json'):
raise RuntimeError('fixture.json not found')
fixture = None
with codecs.open('fixture.json', 'r', 'utf-8') as fixture_file:
fixture = json.loads(fixture_file.read())
engine = create_engine(sys.argv[1])
Session = sessionmaker(bind=engine)
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)
session = Session()
for tag_fixture in fixture['tags']:
tag = Tag(name=tag_fixture)
session.add(tag)
session.commit()
for post_fixture in fixture['posts']:
post = Post(
slug=post_fixture['slug'],
title=post_fixture['title']
)
post.tags = session.query(Tag).\
filter(Tag.name.in_(post_fixture['tags'])).\
all()
session.add(post)
session.commit()
session.close()
Copyright (c) 2013 Tomasz Wójcik <labs@tomekwojcik.pl>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# -*- coding: utf-8 -*-
"""
make_fixture.py
===============
This script builds fixture for tagging test. It generates a JSON file with
posts and tags.
"""
import codecs
import random
import json
LOREM_IPSUM = (
"""Lorem ipsum dolor sit amet consectetur adipiscing elit Morbi """
"""dictum lectus a libero egestas consectetur ut id ligula Quisque """
"""eleifend"""
)
if __name__ == '__main__':
tags_set = set([word.lower().strip() for word in LOREM_IPSUM.split(' ')])
fixture = {
'tags': list(tags_set),
'posts': []
}
for i in xrange(1, 10001):
fixture['posts'].append({
'slug': 'post_%d' % i,
'title': 'Post #%d' % i,
'tags': random.sample(fixture['tags'], 5)
})
fixture['tags'].extend(['spam', 'eggs'])
fixture['posts'].append({
'slug': 'my_spam_eggs_post',
'title': 'My Spam Eggs Post',
'tags': ['spam', 'eggs']
})
fixture['posts'].append({
'slug': 'my_spam_post',
'title': 'My Spam Post',
'tags': ['spam']
})
fixture['posts'].append({
'slug': 'my_eggs_post',
'title': 'My Eggs Post',
'tags': ['eggs']
})
with codecs.open('fixture.json', 'w', 'utf-8') as json_file:
json_file.write(json.dumps(fixture))
.gitignoreMySQL-python==1.2.4
SQLAlchemy==0.8.1
psycopg2==2.5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment