Skip to content

Instantly share code, notes, and snippets.

@btbytes
Last active January 23, 2024 18:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save btbytes/0ac834b02c87e426d98bd59a24b89459 to your computer and use it in GitHub Desktop.
Save btbytes/0ac834b02c87e426d98bd59a24b89459 to your computer and use it in GitHub Desktop.
pinboard schema
-- source https://web.archive.org/web/20220605044249/https://static.pinboard.in/schema.htm
-- MySQL dump 10.13
--
-- Pinboard database schema
-- Feel free to use this however you like. Send questions to support@pinboard.in.
-- For best results, use the Percona version of MySQL! http://www.percona.com/software/percona-server/
CREATE TABLE `bookmarks` (
`id` int(11) NOT NULL,
`url` mediumtext, -- verbatim URL (may differ from actual URL referenced by url_id)
`title` varchar(255),
`description` mediumtext,
`user_id` int(11) NOT NULL,
`toread` tinyint(1) DEFAULT '0',
`private` binary(1) DEFAULT '0',
`url_id` int(11),
`slug` char(20), -- opaque token for use in URLs
`snapshot_id` int(11),
`code` char(3), -- http response code (if crawled)
`source` smallint(6), -- numeric
`added_at` datetime, -- date added to Pinboard
`created_at` datetime, -- stated creation date
`updated_at` datetime,
PRIMARY KEY (`id`),
UNIQUE KEY `slug` (`slug`),
UNIQUE KEY `bookmark` (`user_id`,`url`(255)),
KEY `created` (`created_at`),
KEY `user` (`user_id`),
KEY `private` (`private`),
KEY `url` (`url_id`),
KEY `toread` (`toread`),
KEY `updated` (`updated_at`),
KEY `snapshot` (`snapshot_id`),
KEY `code` (`code`),
KEY `multi` (`user_id`,`private`,`toread`,`created_at`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- Table structure for table `btags` (short for 'bookmark tags')
CREATE TABLE `btags` (
`id` int(11) NOT NULL,
`user_id` int(11) NOT NULL,
`bookmark_id` int(11) NOT NULL,
`url_id` int(11),
`tag` varchar(255),
`created_at` datetime,
`private` tinyint(1),
`seq` tinyint(4), -- preserve display order
PRIMARY KEY (`id`),
UNIQUE KEY `btag` (`user_id`,`bookmark_id`,`tag`),
KEY `user` (`user_id`),
KEY `tag` (`tag`),
KEY `bookmark` (`bookmark_id`),
KEY `url` (`url_id`),
KEY `private` (`private`),
KEY `usertag` (`user_id`,`tag`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- Table structure for table `snapshots`
-- (snapshots are directories created by wget)
CREATE TABLE `snapshots` (
`id` int(11) NOT NULL,
`url_id` int(11) NOT NULL,
`crawled_at` datetime,
`slug` varchar(30),
`content_type` varchar(200),
`etag` varchar(255), -- from Etag header
`last_modified` varchar(255), -- from Last-Modified header
`code` char(3), -- http status code
`content_length` varchar(20), -- from Content-Length header
`num_files` smallint(6),
`size` int(11), -- size in bytes of snapshot files
`filename` varchar(255), -- which file in this snapshot to serve
`updated_at` datetime,
`user_id` int(11),
`remote_backup` datetime,
`flagged` binary(1) DEFAULT '0',
`actual_url_id` int(11), -- where we ended up after redirects
`server` varchar(200), -- where this snapshot is stored
`charset` varchar(20), -- detected charset (don't trust servers!)
PRIMARY KEY (`id`),
UNIQUE KEY `user_url` (`url_id`,`user_id`),
KEY `size` (`size`),
KEY `content_length` (`content_length`),
KEY `content_type` (`content_type`),
KEY `slug` (`slug`),
KEY `user_id` (`user_id`),
KEY `code` (`code`),
KEY `crawled` (`crawled_at`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--
-- Table structure for table `urls`
--
CREATE TABLE `urls` (
`id` int(11) NOT NULL,
`url` mediumtext CHARACTER SET latin1, -- latin1 reduces storage requirement
`created_at` datetime,
`count` int(11),
`slug` varchar(40),
`alias_of` int(11), -- mark duplicates
`last_checked` datetime,
`last_status` smallint(6), -- most recent HTTP status code
`content_hash` varchar(255), -- content hash of most recent snapshot
`etag` varchar(255), -- from http headers
`last_modified` varchar(255), -- from http headers
`domain` varchar(255),
PRIMARY KEY (`id`),
UNIQUE KEY `slug` (`slug`),
KEY `count` (`count`),
KEY `url` (`url`(767))
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--
-- Table structure for table `user_tags` (summary table generated from btags)
-- tags are stored here twice - once a public set and a private set
CREATE TABLE `user_tags` (
`tag` varchar(255),
`user_id` int(11) NOT NULL,
`count` int(11),
`include_private` tinyint(1), -- is this tag in the public set or private set?
UNIQUE KEY `usertag` (`user_id`,`tag`,`include_private`),
KEY `count` (`count`),
KEY `tag` (`tag`),
KEY `user` (`user_id`),
KEY `multi` (`user_id`,`include_private`,`count`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--
-- Table structure for table `users`
--
CREATE TABLE `users` (
`id` int(11) NOT NULL,
`login` varchar(40),
`password` varchar(60), -- bcrypt
`created_at` datetime,
`last_login` datetime,
`email` varchar(255),
`name` varchar(255),
`enabled` tinyint(1) DEFAULT '0',
`signup_paid` tinyint(1) DEFAULT '0',
`email_confirmed` tinyint(1) DEFAULT '0',
`reset_code` char(20),
`email_slug` char(6), -- secret token for bookmarking by email
`last_active` datetime, -- most recent add/edit/delete
`bytes_used` int(11) DEFAULT '0',
`disk_used` int(11) DEFAULT '0',
`public_count` int(11),
`private_count` int(11),
`cached_count` int(11), -- all crawled bookmarks (includes errors)
`snapshot_count` int(11), -- actual snapshots stored
`unread_count` int(11),
`rss_slug` char(20), -- secret token for private RSS feeds
`language` char(2),
`tag_count` smallint(6),
`snapshot_error_count` int(11),
`cached_size` bigint(20), -- total archive size in bytes (need bigint!)
`cached_disk_size` bigint(20), -- actual disk used
`oversize_count` int(11),
`is_premium` binary(1) DEFAULT '0',
`fee` smallint(6),
`ftext_indexed_at` datetime, -- date last fulltext index completed
`ftext_count` int(11), -- number of bookmarks with parsed text
`ftext_size` bigint(20), -- size of extracted text
PRIMARY KEY (`id`),
UNIQUE KEY `login` (`login`),
KEY `updated` (`created_at`),
KEY `premium` (`is_premium`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- pinboard.sql converted to sqlite schema using ChatGPT 4.0
CREATE TABLE bookmarks (
id INTEGER NOT NULL,
url TEXT,
title TEXT,
description TEXT,
user_id INTEGER NOT NULL,
toread INTEGER DEFAULT 0,
private INTEGER DEFAULT 0,
url_id INTEGER,
slug TEXT,
snapshot_id INTEGER,
code TEXT,
source INTEGER,
added_at TEXT,
created_at TEXT,
updated_at TEXT,
PRIMARY KEY(id),
UNIQUE(slug),
UNIQUE(user_id, url)
);
CREATE TABLE btags (
id INTEGER NOT NULL,
user_id INTEGER NOT NULL,
bookmark_id INTEGER NOT NULL,
url_id INTEGER,
tag TEXT,
created_at TEXT,
private INTEGER,
seq INTEGER,
PRIMARY KEY(id),
UNIQUE(user_id, bookmark_id, tag)
);
CREATE TABLE snapshots (
id INTEGER NOT NULL,
url_id INTEGER NOT NULL,
crawled_at TEXT,
slug TEXT,
content_type TEXT,
etag TEXT,
last_modified TEXT,
code TEXT,
content_length TEXT,
num_files INTEGER,
size INTEGER,
filename TEXT,
updated_at TEXT,
user_id INTEGER,
remote_backup TEXT,
flagged INTEGER DEFAULT 0,
actual_url_id INTEGER,
server TEXT,
charset TEXT,
PRIMARY KEY(id),
UNIQUE(url_id, user_id)
);
CREATE TABLE urls (
id INTEGER NOT NULL,
url TEXT,
created_at TEXT,
count INTEGER,
slug TEXT,
alias_of INTEGER,
last_checked TEXT,
last_status INTEGER,
content_hash TEXT,
etag TEXT,
last_modified TEXT,
domain TEXT,
PRIMARY KEY(id),
UNIQUE(slug)
);
CREATE TABLE user_tags (
tag TEXT,
user_id INTEGER NOT NULL,
count INTEGER,
include_private INTEGER,
UNIQUE(user_id, tag, include_private)
);
CREATE TABLE users (
id INTEGER NOT NULL,
login TEXT,
password TEXT,
created_at TEXT,
last_login TEXT,
email TEXT,
name TEXT,
enabled INTEGER DEFAULT 0,
signup_paid INTEGER DEFAULT 0,
email_confirmed INTEGER DEFAULT 0,
reset_code TEXT,
email_slug TEXT,
last_active TEXT,
bytes_used INTEGER DEFAULT 0,
disk_used INTEGER DEFAULT 0,
public_count INTEGER,
private_count INTEGER,
cached_count INTEGER,
snapshot_count INTEGER,
unread_count INTEGER,
rss_slug TEXT,
language TEXT,
tag_count INTEGER,
snapshot_error_count INTEGER,
cached_size INTEGER,
cached_disk_size INTEGER,
oversize_count INTEGER,
is_premium INTEGER DEFAULT 0,
fee INTEGER,
ftext_indexed_at TEXT,
ftext_count INTEGER,
ftext_size INTEGER,
PRIMARY KEY(id),
UNIQUE(login)
);
-- virtual tables
CREATE VIRTUAL TABLE bookmarks USING fts5(
id UNINDEXED,
url,
title,
description,
user_id UNINDEXED,
toread UNINDEXED,
private UNINDEXED,
url_id UNINDEXED,
slug UNINDEXED,
snapshot_id UNINDEXED,
code UNINDEXED,
source UNINDEXED,
added_at UNINDEXED,
created_at UNINDEXED,
updated_at UNINDEXED
);
CREATE VIRTUAL TABLE btags USING fts5(
id UNINDEXED,
user_id UNINDEXED,
bookmark_id UNINDEXED,
url_id UNINDEXED,
tag,
created_at UNINDEXED,
private UNINDEXED,
seq UNINDEXED
);
-- In these definitions, the UNINDEXED keyword is used for columns that are not
-- part of the full-text index. The columns without this keyword are included
-- in the full-text index.
-- Note: The FTS5 tables are virtual tables and do not directly store content.
-- They usually sit on top of regular tables, and you populate them with the
-- data from these regular tables. Whenever you make changes to the regular
-- tables (like inserts, updates, or deletes), you'll also need to update the
-- corresponding FTS5 table.
-- Also, these virtual tables do not enforce constraints like PRIMARY KEY or
-- UNIQUE, and they do not support all column types. For maintaining
-- constraints and relationships, you need to handle it in your actual
-- bookmarks and btags tables.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment