-- PostgreSQL table structure for AimSniff -- -- Crufted up by Jason Healy -- -- BEFORE RUNNING THIS SCRIPT, you must add PL/Perl and PLPGSQL as -- supported external languages. You can do this by running: -- -- createlang plperl dbname -- createlang plpgsql dbname -- -- on the command line (as the PostgreSQL root user). -- -- -- Perl Support Functions -- -- This function removes and tags from a given log message, -- as well as stripping any extraneous chars from the message. CREATE FUNCTION aimsniff_scrub_message (varchar) RETURNS varchar AS ' my $log = shift(@_); return undef unless(defined $log); # Remove trailing bits after the HTML body $log =~ s#.*$##i; $log =~ s#(.*)#$1#ig; $log =~ s#]*>(.*)#$1#ig; return $log; ' LANGUAGE plperl; -- This function strips any trailing/leading whitespace from a string CREATE FUNCTION aimsniff_strip_whitespace (varchar) RETURNS varchar AS ' my $str = shift(@_); return undef unless(defined $str); # Remove leading whitespace $str =~ s#^\\s+##; # Remove trailing whitespace $str =~ s#\\s+$##; return $str; ' LANGUAGE plperl; -- This function strips any HTML tag-like things from a string CREATE FUNCTION aimsniff_strip_html_tags (varchar) RETURNS varchar AS ' my $str = shift(@_); return undef unless(defined $str); # Remove tag-like things $str =~ s#]*>##g; return $str; ' LANGUAGE plperl; -- Given a known AIM handle, this function tries to divine the other -- handle of a conversation, regardless of whether its the "to" or "from" CREATE FUNCTION aimsniff_partner (varchar, varchar, varchar) RETURNS varchar AS ' my $known = shift(@_); my $from = shift(@_); my $to = shift(@_); if ( ("" ne $to) && ($known ne $to) ) { return $to; } elsif ( ("" ne $from) && ($known ne $from) ) { return $from; } return "UNKNOWN"; ' LANGUAGE plperl; -- Given a block of text, extract the first HTTP URL found and return it CREATE FUNCTION aimsniff_extract_href (varchar) RETURNS varchar AS ' my $block = shift(@_); if ($block =~ /([^<>]+)<\\/a>/i) { return qq{$1 ($2)}; } elsif ($block =~ /(http:[^\s"]+)/i) { return qq{$1}; } # catch-all, return null return undef; ' LANGUAGE plperl; -- select count(distinct fromHandle) as mentions, aimsniff_extract_href(message) as url from logs where message ilike '%http://%' group by url order by mentions desc, url; -- -- Table structure for table 'buddies' -- CREATE TABLE buddies ( id SERIAL NOT NULL, ip INET DEFAULT NULL, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, buddygroup VARCHAR(255) DEFAULT NULL, buddylist TEXT DEFAULT NULL, PRIMARY KEY (id) ); CREATE INDEX buddies_ts_index ON buddies (ts); -- -- Table structure for table 'handles' -- CREATE TABLE handles ( id SERIAL NOT NULL, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, ip INET DEFAULT NULL, handle VARCHAR(255) DEFAULT NULL, username VARCHAR(255) DEFAULT NULL, machine VARCHAR(255) DEFAULT NULL, PRIMARY KEY (id) ); CREATE INDEX handles_handle_index ON handles (handle); CREATE INDEX handles_ip_index ON handles (ip); -- Trigger and function to cleanse data before insertion CREATE FUNCTION aimsniff_scrub_handles_row() RETURNS trigger AS ' BEGIN -- Strip whitespace from values -- Convert AIM handles to lower case for easy comparision NEW.handle := LOWER(aimsniff_strip_whitespace(NEW.handle)); NEW.username := aimsniff_strip_whitespace(NEW.username); NEW.machine := aimsniff_strip_whitespace(NEW.machine); RETURN NEW; END; ' LANGUAGE plpgsql; CREATE TRIGGER aimsniff_scrub_handles_insert BEFORE INSERT ON handles FOR EACH ROW EXECUTE PROCEDURE aimsniff_scrub_handles_row(); -- -- Table structure for table 'logs' -- CREATE TABLE logs ( id SERIAL NOT NULL, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, ip INET DEFAULT NULL, fromHandle VARCHAR(255) DEFAULT NULL, handle VARCHAR(255) DEFAULT NULL, direction INTEGER DEFAULT NULL, message TEXT, PRIMARY KEY (id) ); CREATE INDEX logs_ts_index ON logs (ts); CREATE INDEX logs_ip_index ON logs (ip); CREATE INDEX logs_fromHandle_index ON logs (fromHandle); CREATE INDEX logs_handle_index ON logs (handle); -- Trigger and function to cleanse data before insertion CREATE FUNCTION aimsniff_scrub_logs_row() RETURNS trigger AS ' BEGIN -- Strip whitespace from values -- Convert AIM handles to lower case for easy comparision NEW.fromHandle := LOWER(aimsniff_strip_whitespace(NEW.fromHandle)); NEW.handle := LOWER(aimsniff_strip_whitespace(NEW.handle)); NEW.message := aimsniff_strip_whitespace(NEW.message); -- Strip specific HTML tags from message NEW.message := aimsniff_scrub_message(NEW.message); RETURN NEW; END; ' LANGUAGE plpgsql; CREATE TRIGGER aimsniff_scrub_logs_insert BEFORE INSERT ON logs FOR EACH ROW EXECUTE PROCEDURE aimsniff_scrub_logs_row(); -- -- Table structure for table 'versions' -- CREATE TABLE versions ( id SERIAL NOT NULL, ip INET DEFAULT NULL, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, handle VARCHAR(255) DEFAULT NULL, version VARCHAR(255) DEFAULT NULL, country VARCHAR(5) DEFAULT NULL, language VARCHAR(5) DEFAULT NULL, PRIMARY KEY (id) ); -- -- Table structure for table 'aliases' -- CREATE TABLE aliases ( handle VARCHAR(255) NOT NULL, realname VARCHAR(255) NOT NULL, PRIMARY KEY (handle) ); CREATE INDEX aliases_handle_index ON aliases (handle); -- -- View to remove duplicate messages -- CREATE or REPLACE VIEW deduped AS SELECT LOGS.ts, LOGS.fromHandle, LOGS.handle, LOGS.message FROM logs LOGS WHERE NOT EXISTS ( SELECT DUP.id FROM logs DUP WHERE DUP.ts > (LOGS.ts + '5 SECONDS AGO)') AND DUP.ts <= LOGS.ts AND DUP.id <> LOGS.id AND DUP.message = LOGS.message ); -- -- Show snippets of conversation "around" a certain search term -- CREATE or REPLACE FUNCTION search_around(text, timestamp, timestamp, interval) RETURNS SETOF logs AS ' DECLARE term ALIAS FOR $1; start ALIAS FOR $2; finish ALIAS FOR $3; context ALIAS FOR $4; match RECORD; last TIMESTAMP := ''0000-01-01 00:00:00''; near logs%ROWTYPE; BEGIN FOR match IN SELECT ts,fromhandle,handle FROM logs WHERE message ilike term and ts > start AND ts < finish ORDER BY ts ASC LOOP FOR near IN SELECT * FROM logs WHERE ts > (match.ts - context) AND ts < (match.ts + context) AND ts > (last + context) AND ( (fromhandle=match.fromhandle AND handle=match.handle) OR (fromhandle=match.handle AND handle=match.fromhandle) ) ORDER BY ts ASC LOOP RETURN NEXT near; END LOOP; last := match.ts; END LOOP; RETURN; END; ' LANGUAGE plpgsql; -- Version of the function with some reasonable defaults CREATE TYPE recent_search AS (ts timestamp, "from" varchar, "to" varchar, message text); CREATE or REPLACE FUNCTION search_recent(text) RETURNS SETOF recent_search AS ' DECLARE term ALIAS FOR $1; start timestamp := (now() - ''1 week''::interval); finish timestamp := now()::timestamp; context interval := ''00:10:00''::interval; match recent_search; BEGIN FOR match IN SELECT ts, fromhandle, handle, aimsniff_strip_html_tags(message) FROM search_around(term, start, finish, context) LOOP RETURN NEXT match; END LOOP; RETURN; END; ' LANGUAGE plpgsql;