/****************************************************************************** * DDL ******************************************************************************/ DROP TABLE IF EXISTS fi_fulltext_index CASCADE; DROP TABLE IF EXISTS fi_attribute CASCADE; DROP TABLE IF EXISTS fi_word_type CASCADE; DROP TABLE IF EXISTS fi_dictionary CASCADE; DROP TABLE IF EXISTS fi_replace_letter CASCADE; DROP TABLE IF EXISTS document CASCADE; CREATE TABLE document ( id INTEGER NOT NULL, author VARCHAR NOT NULL, title VARCHAR NOT NULL, content TEXT, PRIMARY KEY (id) ) ; CREATE TABLE fi_replace_letter ( letter_from CHAR(1) NOT NULL, letters_to VARCHAR(3) NOT NULL, PRIMARY KEY (letter_from), UNIQUE (letters_to) ) ; INSERT INTO fi_replace_letter(letter_from, letters_to) VALUES ('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss'), ('f', 'ph') ; -- The enum table "fi_attribute" contains the names -- of all text attributes of the table "document". CREATE TABLE fi_attribute ( id INTEGER NOT NULL, name VARCHAR(50) NOT NULL, PRIMARY KEY (id), UNIQUE (name) ) ; INSERT INTO fi_attribute(id, name) VALUES (0, 'author'), (1, 'title'), (2, 'content') ; -- A word can be transformed in different ways before -- it is inserted into the dictionary. The different -- types of those transformations are stored in the -- enum table "fi_word_type". CREATE TABLE fi_word_type ( id INTEGER NOT NULL, name VARCHAR(50) NOT NULL, prefix VARCHAR(10) NOT NULL, PRIMARY KEY (id), UNIQUE (name) ) ; INSERT INTO fi_word_type(id, name, prefix) VALUES (0, 'original', '*'), -- not used yet (1, 'normalized', ''), (2, 'normalized reverse', '@') ; CREATE TABLE fi_dictionary ( word VARCHAR NOT NULL, id INTEGER NOT NULL, id_t INTEGER NOT NULL, PRIMARY KEY (word)--, -- In prduction mode the foreign key constraint is -- ommitted due to performance considerations. -- As the dictionary is created automatically, -- the constraints are satisfied automatically, too. --FOREIGN KEY (id_t) REFERENCES fi_word_type(id) ) ; CREATE TABLE fi_fulltext_index ( id_w INTEGER NOT NULL, id_a INTEGER NOT NULL, pos BIGINT NOT NULL, id_d INTEGER NOT NULL, PRIMARY KEY (id_w, id_a, pos, id_d)--, -- In prduction mode the foreign key constraints are -- ommitted due to performance considerations. -- As the fulltext index is created automatically, -- the constraints are satisfied automatically, too. --FOREIGN KEY (id_d) REFERENCES document (id), --FOREIGN KEY (id_a) REFERENCES fi_attribute (id) ) ; CREATE VIEW document_extended AS SELECT d.id, id_w, word, wt.name AS word_type, id_a, a.name AS attribute, pos, author, title, content FROM fi_fulltext_index fi JOIN document d ON fi.id_d = d.id JOIN fi_dictionary w ON fi.id_w = w.id JOIN fi_word_type wt ON w.id_t = wt.id JOIN fi_attribute a ON fi.id_a = a.id ; /****************************************************************************** * Configuration ******************************************************************************/ CREATE OR REPLACE FUNCTION fi_delimiters_regexp() RETURNS TEXT LANGUAGE SQL IMMUTABLE AS $$ SELECT '[^\w_+]+' $$ ; CREATE OR REPLACE FUNCTION fi_transform_to(c CHAR) RETURNS TEXT LANGUAGE SQL IMMUTABLE AS $$ SELECT letters_to FROM fi_replace_letter WHERE letter_from = c $$ ; CREATE OR REPLACE FUNCTION fi_create_fulltext_table(p_row document) RETURNS TABLE (attribute VARCHAR, fulltext TEXT) LANGUAGE SQL AS $$ SELECT * FROM (VALUES ('author', p_row.author), ('title', p_row.title), ('content', COALESCE(p_row.content, '')) ) AS fulltext(attribute, fulltext) $$ ; -- SELECT d.id, ft.* FROM document AS d, LATERAL fi_create_fulltext_table(d) AS ft; /****************************************************************************** * Auxilliary functions ******************************************************************************/ CREATE OR REPLACE FUNCTION fi_normalize(p_text text) RETURNS text RETURNS NULL ON NULL INPUT LANGUAGE SQL IMMUTABLE AS $$ SELECT STRING_AGG(COALESCE(fi_transform_to(c),c),'') FROM REGEXP_SPLIT_TO_TABLE(LOWER(p_text), '') dummy(c) $$ ; CREATE OR REPLACE FUNCTION fi_normalize_fulltext_table(p_row document) RETURNS TABLE (attribute VARCHAR, word TEXT, pos BIGINT) LANGUAGE SQL IMMUTABLE AS $$ SELECT attribute, word, pos FROM fi_create_fulltext_table(p_row) ft, LATERAL (SELECT fi_normalize(word) AS word, ROW_NUMBER() OVER () AS pos FROM REGEXP_SPLIT_TO_TABLE(fi_normalize(fulltext), fi_delimiters_regexp()) st(word) WHERE word <> '' ) nt $$ ; --SELECT d.id, ft.* FROM document AS d, LATERAL fi_normalize_fulltext_table(d) AS ft; /****************************************************************************** * Trigger ******************************************************************************/ CREATE OR REPLACE FUNCTION fi_fulltext_update_function() RETURNS TRIGGER LANGUAGE PLPGSQL AS $$ BEGIN IF (TG_OP = 'DELETE' OR TG_OP = 'UPDATE') THEN DELETE FROM fi_fulltext_index WHERE id_d = OLD.id_d; END IF ; IF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN CREATE TEMPORARY TABLE l_text ( word TEXT, id_w INTEGER, id_a INTEGER, pos INTEGER, id_d INTEGER ) ; WITH nt (attribute, word, pos) AS (SELECT attribute, word, pos FROM fi_normalize_fulltext_table(NEW)) INSERT INTO l_text(word, id_w, id_a, pos, id_d) SELECT word, (SELECT id FROM fi_dictionary d WHERE d.word = nt.word), (SELECT id FROM fi_attribute a WHERE a.name = nt.attribute), pos, NEW.id FROM nt ; INSERT INTO fi_dictionary(word, id, id_t) SELECT wt.prefix||word, (SELECT COALESCE(MAX(id),0) FROM fi_dictionary) + ROW_NUMBER() OVER (), wt.id FROM (SELECT DISTINCT word FROM l_text WHERE id_w IS NULL) t(word), (SELECT id, prefix FROM fi_word_type WHERE name = 'normalized') wt ; INSERT INTO fi_dictionary(word, id, id_t) SELECT wt.prefix||reverse(word), d.id, wt.id FROM fi_dictionary d, (SELECT id, prefix FROM fi_word_type WHERE name = 'normalized reverse') wt WHERE word IN (SELECT DISTINCT word FROM l_text WHERE id_w IS NULL) ; INSERT INTO fi_fulltext_index(id_w, id_a, pos, id_d) SELECT (SELECT id FROM fi_dictionary d WHERE d.word = t.word), id_a, pos, id_d FROM l_text t ; -- -- Debugging! -- IF NEW.id = 2 -- THEN RAISE EXCEPTION 'count(l_text): %', (SELECT COUNT(*) FROM l_text); -- END IF -- ; DROP TABLE l_text; END IF ; RETURN NULL; END $$ ; DROP TRIGGER IF EXISTS fi_fulltext_update_trigger ON document; CREATE TRIGGER fi_fulltext_update_trigger AFTER INSERT OR UPDATE OR DELETE ON document FOR EACH ROW EXECUTE PROCEDURE fi_fulltext_update_function() ; /****************************************************************************** * Data ******************************************************************************/ INSERT INTO document(id, author, title, content) VALUES (1, 'Wolfgang Kowarschick', 'Skript zur Vorlesung MMDB, Fakultät für Informatik', 'Vorlesung MM-Datenbanksysteme im WiSe 2009/2010' ), (2, 'Wolfgang Kowarschick', 'Skript zur Vorlesung MMProg', 'Vorlesung MM-Programmierung im WiSe 2009/2010' ), (3, 'Christian Märtin, Rolf Socher, Michael Lutz', 'Theoretische Grundlagen der Informatik', 'Theoretische Grundlagen der Informatik, 3. Auflage' )/*, (4, 'Wolfgang Kowarschick', 'Skript zur Vorlesung PM', NULL )*/ ; /****************************************************************************** * Testing querys to be used after a (re)build ******************************************************************************/ SELECT * FROM document_extended WHERE word_type = 'normalized' ORDER BY id, id_a, pos ; /* SELECT * FROM fi_dictionary ORDER BY id, id_t ; SELECT * FROM fi_fulltext_index ; */