DEFAULT
can be an expression (e.g. custom primary key generation)
CHECK
constraints
Strongly typed
CREATE TABLE events ( weekday INTEGER[] );
Can be used as a document store!
CREATE TABLE json_log( item JSON ); INSERT INTO json_log (item) VALUES ('{"remote_addr": "130.185.109.239","remote_user": "-","time_local": "13/Feb/2014:07:13:17 -0500","request": "GET /robots.txt HTTP/1.1","status": "200","body_bytes_sent": "0","http_referer": "-","http_user_agent": "PagesInventory (robot http://www.pagesinvenotry.com)"}'), ... ('{"remote_addr": "46.137.98.52","remote_user": "-","time_local": "13/Feb/2014:11:37:47 -0500","request": "HEAD / HTTP/1.0","status": "200","body_bytes_sent": "0","http_referer": "-","http_user_agent": "-"}'); SELECT item->>'remote_addr', item->>'request', item->>'http_user_agent' FROM json_log WHERE item->>'http_user_agent' LIKE '%robot%';
CREATE TYPE log_item AS ( remote_addr TEXT, time_local TIMESTAMP, request TEXT, http_user_agent TEXT ); CREATE TABLE server_log ( id SERIAL PRIMARY KEY, item log_item ); INSERT INTO server_log (item) SELECT ROW(item->>'remote_addr', (item->>'time_local')::timestamp, item->>'request', item->>'http_user_agent')::log_item FROM json_log; SELECT (item).* FROM server_log WHERE (item).http_user_agent LIKE '%robot%';
Users and their first orders
SELECT DISTINCT u.id, u.name , FIRST_VALUE(o.id) OVER creation order_id , FIRST_VALUE(o.amount) OVER creation order_amount FROM users u JOIN orders o ON o.user_id = u.id WINDOW creation AS (PARTITION BY user_id ORDER BY created);
Users and their biggest orders
SELECT DISTINCT u.id, u.name , FIRST_VALUE(o.id) OVER creation order_id , FIRST_VALUE(o.amount) OVER creation order_amount FROM users u JOIN orders o ON o.user_id = u.id WINDOW creation AS (PARTITION BY user_id ORDER BY amount DESC);
Order's ordinal number per-user
SELECT u.id, u.name, o.id order_id, o.amount , RANK() OVER creation user_order_number FROM users u JOIN orders o ON o.user_id = u.id WINDOW creation AS (PARTITION BY user_id ORDER BY created);
Break complicated nested logic into sequential steps
WITH regional_sales AS ( SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region ), top_regions AS ( SELECT region FROM regional_sales WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales) ) SELECT region, product, SUM(quantity) AS product_units, SUM(amount) AS product_sales FROM orders WHERE region IN (SELECT region FROM top_regions) GROUP BY region, product;
CREATE TABLE category ( id SERIAL PRIMARY KEY, description TEXT, parent_id INTEGER REFERENCES category(id) ); WITH RECURSIVE subs(id, parent_id, ids, descriptions) AS ( SELECT id, parent_id , ARRAY[id], ARRAY[description] FROM category UNION ALL SELECT s.id, c.parent_id , ARRAY_PREPEND(c.id, s.ids) , ARRAY_PREPEND(c.description, s.descriptions) FROM subs s, category c WHERE s.parent_id = c.id ) SELECT id, ids, descriptions FROM subs;
Fast date-component query (month, year, week)
CREATE INDEX bid_created_hour ON bid (date_part('hour'::text, created)); CREATE INDEX bid_created_weekday ON bid (date_part('hour'::text, created)); CREATE INDEX bid_created_weekday ON bid (EXTRACT(DOW FROM created));
QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------- Aggregate (cost=44203.48..44203.49 rows=1 width=0) (actual time=1621.464..1621.465 rows=1 loops=1) -> Bitmap Heap Scan on bid (cost=656.38..44126.42 rows=30824 width=0) (actual time=344.487..1430.674 rows=2896367 loops=1) Recheck Cond: ((date_part('dow'::text, created) >= 3::double precision) AND (date_part('dow'::text, created) <= 5::double precision)) Rows Removed by Index Recheck: 7866 -> Bitmap Index Scan on bid_created_weekday (cost=0.00..648.68 rows=30824 width=0) (actual time=342.796..342.796 rows=2896367 loops=1) Index Cond: ((date_part('dow'::text, created) >= 3::double precision) AND (date_part('dow'::text, created) <= 5::double precision)) Total runtime: 1621.679 ms
CREATE FOREIGN TABLE nginx_log_2 ( remote_addr TEXT, remote_user TEXT, time_local TIMESTAMP WITH TIME ZONE, request TEXT, status TEXT, body_bytes_sent TEXT, http_referer TEXT, http_user_agent TEXT ) SERVER json_server OPTIONS (filename '/home/ubolonton/Programming/projects/postgresql-examples/nginx-log/access.log.2.gz', max_error_count '99999');
-- Request counts by status SELECT status, COUNT(*) FROM nginx_log_cleaned GROUP BY status ORDER BY status; -- Average response size SELECT AVG(body_bytes_sent) FROM nginx_log_cleaned; -- All requests not from me SELECT COUNT(*) FROM nginx_log_cleaned WHERE http_user_agent NOT LIKE '%conkeror%'; -- Requests by hour WITH tmp AS ( SELECT *, EXTRACT(HOUR FROM (time_local AT TIME ZONE '+07:00')) AS hour FROM nginx_log_cleaned ) SELECT hour, COUNT(*) FROM tmp GROUP BY hour ORDER BY hour;
http://thebuild.com/presentations/pg-as-nosql-pgday-fosdem-2013.pdf
Years working with MySQL don't give as much understanding as 1 month of reading PostgreSQL documentation
ubolonton on Github, LinkedIn, Skype, Facebook, Twitter, Google+…
Any question, comment, feedback, or correction is welcome.