Skip to content

Instantly share code, notes, and snippets.

@despens
Created March 21, 2012 09:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save despens/2145757 to your computer and use it in GitHub Desktop.
Save despens/2145757 to your computer and use it in GitHub Desktop.
Geocities Database
CREATE TABLE agents
(
-- Each agent is identified by its name
id text NOT NULL,
-- Flag indicating that the agent is a human.
human boolean NOT NULL DEFAULT false,
-- The agent's weight. Agents with a higher weight
-- can overrule information set by lower agents.
weight integer NOT NULL DEFAULT 0,
-- Time when this agent was introduced
ts timestamp with time zone NOT NULL DEFAULT now(),
-- The name is the primary key.
CONSTRAINT agents_pkey PRIMARY KEY (id)
)
CREATE TABLE files
(
--
-- The following fields represent information about a file
-- that is the absolute truth and trivial to figure out.
-- Each file's id is derived from these fields.
--
-- Natural key for the file.
-- md5 checksum for the concatenated fields
-- 'collection', 'hurl', 'path', last_modified', 'size', 'checksum_md5'.
id character varying(32) NOT NULL,
-- Name of the collection this file is part of, for example
-- 'geocities.archiveteam.torrent'.
collection text NOT NULL,
-- Path on the local file system, relative from the
-- directory in which all collections are stored.
path text NOT NULL,
-- The file's last modified timestamp.
last_modified timestamp without time zone NOT NULL,
-- The file's size.
size integer NOT NULL,
-- The md5 checksum of the file's contents.
checksum_md5 character varying(32) NOT NULL,
--
-- The following fields are specific to the archiving process
-- and are not used to generate the id. It is however practical
-- to store them in case something goes wrong.
--
-- The name of the agent responsible for creating this
-- database record.
agent text NOT NULL,
-- Time when this record was created.
ts timestamp with time zone NOT NULL DEFAULT now(),
--
-- Contraints
--
CONSTRAINT files_pkey PRIMARY KEY (id), -- 'id' shall be the primary key
CONSTRAINT files_path_key UNIQUE (path) -- two files in the same path are not possible
)
CREATE TABLE urls
(
-- Natural key of a URL mapping.
-- md5 checksum of concatenated fields
-- 'url', 'file_id', 'agent'
id character varying(32) NOT NULL,
-- Full historic URL.
url text NOT NULL,
-- id of the file the URL points to.
file_id character varying(32) NOT NULL,
-- This mapping is wrong and should be ignored
del boolean NOT NULL DEFAULT false,
-- Name of the agent that created this entry.
agent text NOT NULL,
-- Time when this entry was created. Not used to
-- generate the id.
ts timestamp with time zone NOT NULL DEFAULT now(),
-- Constraints:
CONSTRAINT urls_pkey PRIMARY KEY (id), -- 'id' shall be the primary key
CONSTRAINT urls_file_id_fkey FOREIGN KEY (file_id) -- 'file_id' points to the field 'id'
REFERENCES files (id) MATCH SIMPLE -- in the table 'files'
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment