Skip to content

Instantly share code, notes, and snippets.

@aquilax
Created November 2, 2011 14:55
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save aquilax/1333830 to your computer and use it in GitHub Desktop.
Save aquilax/1333830 to your computer and use it in GitHub Desktop.
Naive Bayes classifier in PostgreSQL
--
-- PostgreSQL database dump
--
-- Dumped from database version 9.1.1
-- Dumped by pg_dump version 9.1.1
-- Started on 2011-11-02 16:53:36 EET
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
--
-- TOC entry 8 (class 2615 OID 28036)
-- Name: bayes; Type: SCHEMA; Schema: -; Owner: -
--
CREATE SCHEMA bayes;
SET search_path = bayes, pg_catalog;
--
-- TOC entry 223 (class 1255 OID 28111)
-- Dependencies: 607 8
-- Name: classify(integer, integer[]); Type: FUNCTION; Schema: bayes; Owner: -
--
CREATE FUNCTION classify(iset_id integer, ifeature_ids integer[]) RETURNS integer
LANGUAGE plpgsql COST 1
AS $$
DECLARE
features_count integer := 0;
r record;
p float := 0;
mult float := 0;
max_prob float := -1;
max_prob_cat integer := 0;
index integer := 1;
fcount integer := 0;
BEGIN
features_count = array_upper(ifeature_ids, 1);
FOR r IN
SELECT category_id, count(*) AS cntr
FROM bayes.feature_category
WHERE set_id = iset_id
GROUP BY category_id
LOOP
p = 1;
WHILE index <= features_count
LOOP
IF (r.cntr = 0) THEN
mult = 0.001;
ELSE
SELECT cnt
FROM bayes.feature_category
WHERE feature_id = ifeature_ids[index]
AND set_id = iset_id
AND category_id = r.category_id INTO fcount;
IF (fcount IS NULL) THEN
fcount = 0;
END IF;
mult = fcount / r.cntr;
END IF;
p = p * mult;
index = index + 1;
END LOOP;
--RAISE NOTICE 'cat % like %', r.category_id, p;
IF p > max_prob THEN
max_prob_cat = r.category_id;
max_prob = p;
END IF;
END LOOP;
RETURN max_prob_cat;
END;
$$;
--
-- TOC entry 224 (class 1255 OID 28110)
-- Dependencies: 607 8
-- Name: tokenize(character varying[], boolean); Type: FUNCTION; Schema: bayes; Owner: -
--
CREATE FUNCTION tokenize(ifeatures character varying[], ido_insert boolean) RETURNS integer[]
LANGUAGE plpgsql COST 1
AS $$
DECLARE
w varchar;
fid integer := 0;
index integer := 1;
count integer := 0;
outa int[];
BEGIN
count = array_upper(ifeatures, 1);
WHILE index <= count
LOOP
RAISE NOTICE '%', ifeatures[index];
SELECT id FROM bayes.feature WHERE val = ifeatures[index] INTO fid;
IF (fid IS NOT NULL) THEN
outa = array_append(outa, fid);
ELSE
IF ido_insert THEN
INSERT INTO bayes.feature (val) VALUES (ifeatures[index]);
outa = array_append(outa, lastval()::int);
END IF;
END IF;
index := index + 1;
END LOOP;
RETURN outa;
END;
$$;
--
-- TOC entry 222 (class 1255 OID 28092)
-- Dependencies: 607 8
-- Name: train(integer, integer, integer[]); Type: FUNCTION; Schema: bayes; Owner: -
--
CREATE FUNCTION train(iset_id integer, icategory_id integer, ifeature_ids integer[]) RETURNS void
LANGUAGE plpgsql COST 1
AS $$
DECLARE
w integer;
fid integer := 0;
index integer := 1;
count integer := 0;
BEGIN
count = array_upper(ifeature_ids, 1);
WHILE index <= count
LOOP
SELECT id
FROM bayes.feature_category
WHERE set_id = iset_id
AND category_id = icategory_id
AND feature_id = ifeature_ids[index] INTO fid;
IF (fid IS NOT NULL) THEN
UPDATE bayes.feature_category
SET cnt = cnt + 1
WHERE id = fid;
ELSE
INSERT INTO bayes.feature_category
(set_id, category_id, feature_id)
VALUES
(iset_id, icategory_id, ifeature_ids[index]);
END IF;
index := index + 1;
END LOOP;
END;
$$;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- TOC entry 206 (class 1259 OID 28114)
-- Dependencies: 8
-- Name: feature; Type: TABLE; Schema: bayes; Owner: -; Tablespace:
--
CREATE TABLE feature (
id integer NOT NULL,
val character varying(50)
);
--
-- TOC entry 208 (class 1259 OID 28124)
-- Dependencies: 1975 8
-- Name: feature_category; Type: TABLE; Schema: bayes; Owner: -; Tablespace:
--
CREATE TABLE feature_category (
id integer NOT NULL,
set_id integer,
category_id integer,
feature_id integer,
cnt integer DEFAULT 1
);
--
-- TOC entry 207 (class 1259 OID 28122)
-- Dependencies: 208 8
-- Name: feature_category_id_seq; Type: SEQUENCE; Schema: bayes; Owner: -
--
CREATE SEQUENCE feature_category_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- TOC entry 1986 (class 0 OID 0)
-- Dependencies: 207
-- Name: feature_category_id_seq; Type: SEQUENCE OWNED BY; Schema: bayes; Owner: -
--
ALTER SEQUENCE feature_category_id_seq OWNED BY feature_category.id;
--
-- TOC entry 205 (class 1259 OID 28112)
-- Dependencies: 206 8
-- Name: feature_id_seq; Type: SEQUENCE; Schema: bayes; Owner: -
--
CREATE SEQUENCE feature_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- TOC entry 1987 (class 0 OID 0)
-- Dependencies: 205
-- Name: feature_id_seq; Type: SEQUENCE OWNED BY; Schema: bayes; Owner: -
--
ALTER SEQUENCE feature_id_seq OWNED BY feature.id;
--
-- TOC entry 1973 (class 2604 OID 28117)
-- Dependencies: 205 206 206
-- Name: id; Type: DEFAULT; Schema: bayes; Owner: -
--
ALTER TABLE feature ALTER COLUMN id SET DEFAULT nextval('feature_id_seq'::regclass);
--
-- TOC entry 1974 (class 2604 OID 28127)
-- Dependencies: 207 208 208
-- Name: id; Type: DEFAULT; Schema: bayes; Owner: -
--
ALTER TABLE feature_category ALTER COLUMN id SET DEFAULT nextval('feature_category_id_seq'::regclass);
--
-- TOC entry 1981 (class 2606 OID 28130)
-- Dependencies: 208 208
-- Name: feature_category_pkey; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
--
ALTER TABLE ONLY feature_category
ADD CONSTRAINT feature_category_pkey PRIMARY KEY (id);
--
-- TOC entry 1983 (class 2606 OID 28132)
-- Dependencies: 208 208 208 208
-- Name: feature_category_set_id_category_id_feature_id_key; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
--
ALTER TABLE ONLY feature_category
ADD CONSTRAINT feature_category_set_id_category_id_feature_id_key UNIQUE (set_id, category_id, feature_id);
--
-- TOC entry 1977 (class 2606 OID 28119)
-- Dependencies: 206 206
-- Name: feature_pkey; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
--
ALTER TABLE ONLY feature
ADD CONSTRAINT feature_pkey PRIMARY KEY (id);
--
-- TOC entry 1979 (class 2606 OID 28121)
-- Dependencies: 206 206
-- Name: feature_val_key; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
--
ALTER TABLE ONLY feature
ADD CONSTRAINT feature_val_key UNIQUE (val);
-- Completed on 2011-11-02 16:53:37 EET
--
-- PostgreSQL database dump complete
--
@kangasaaron
Copy link

Could you give a tiny bit of instruction on how to use this?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment