DEFAULT can be an expression (e.g. custom primary key generation)
CHECK constraints
Strongly typed
CREATE TABLE events (
weekday INTEGER[]
);
Can be used as a document store!
CREATE TABLE json_log(
item JSON
);
INSERT INTO json_log (item)
VALUES
('{"remote_addr": "130.185.109.239","remote_user": "-","time_local": "13/Feb/2014:07:13:17 -0500","request": "GET /robots.txt HTTP/1.1","status": "200","body_bytes_sent": "0","http_referer": "-","http_user_agent": "PagesInventory (robot http://www.pagesinvenotry.com)"}'),
...
('{"remote_addr": "46.137.98.52","remote_user": "-","time_local": "13/Feb/2014:11:37:47 -0500","request": "HEAD / HTTP/1.0","status": "200","body_bytes_sent": "0","http_referer": "-","http_user_agent": "-"}');
SELECT item->>'remote_addr', item->>'request', item->>'http_user_agent'
FROM json_log
WHERE item->>'http_user_agent' LIKE '%robot%';
CREATE TYPE log_item AS (
remote_addr TEXT,
time_local TIMESTAMP,
request TEXT,
http_user_agent TEXT
);
CREATE TABLE server_log (
id SERIAL PRIMARY KEY,
item log_item
);
INSERT INTO server_log (item)
SELECT ROW(item->>'remote_addr', (item->>'time_local')::timestamp, item->>'request', item->>'http_user_agent')::log_item
FROM json_log;
SELECT (item).*
FROM server_log
WHERE (item).http_user_agent LIKE '%robot%';
Users and their first orders
SELECT DISTINCT u.id, u.name
, FIRST_VALUE(o.id) OVER creation order_id
, FIRST_VALUE(o.amount) OVER creation order_amount
FROM users u
JOIN orders o ON o.user_id = u.id
WINDOW creation AS (PARTITION BY user_id ORDER BY created);
Users and their biggest orders
SELECT DISTINCT u.id, u.name
, FIRST_VALUE(o.id) OVER creation order_id
, FIRST_VALUE(o.amount) OVER creation order_amount
FROM users u
JOIN orders o ON o.user_id = u.id
WINDOW creation AS (PARTITION BY user_id ORDER BY amount DESC);
Order's ordinal number per-user
SELECT u.id, u.name, o.id order_id, o.amount
, RANK() OVER creation user_order_number
FROM users u
JOIN orders o ON o.user_id = u.id
WINDOW creation AS (PARTITION BY user_id ORDER BY created);
Break complicated nested logic into sequential steps
WITH regional_sales AS (
SELECT region, SUM(amount) AS total_sales
FROM orders
GROUP BY region
), top_regions AS (
SELECT region
FROM regional_sales
WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales)
)
SELECT region,
product,
SUM(quantity) AS product_units,
SUM(amount) AS product_sales
FROM orders
WHERE region IN (SELECT region FROM top_regions)
GROUP BY region, product;
CREATE TABLE category (
id SERIAL PRIMARY KEY,
description TEXT,
parent_id INTEGER REFERENCES category(id)
);
WITH RECURSIVE subs(id, parent_id, ids, descriptions) AS (
SELECT id, parent_id
, ARRAY[id], ARRAY[description]
FROM category
UNION ALL
SELECT s.id, c.parent_id
, ARRAY_PREPEND(c.id, s.ids)
, ARRAY_PREPEND(c.description, s.descriptions)
FROM subs s, category c
WHERE s.parent_id = c.id
)
SELECT id, ids, descriptions
FROM subs;
Fast date-component query (month, year, week)
CREATE INDEX bid_created_hour ON bid (date_part('hour'::text, created)); CREATE INDEX bid_created_weekday ON bid (date_part('hour'::text, created)); CREATE INDEX bid_created_weekday ON bid (EXTRACT(DOW FROM created));
QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------- Aggregate (cost=44203.48..44203.49 rows=1 width=0) (actual time=1621.464..1621.465 rows=1 loops=1) -> Bitmap Heap Scan on bid (cost=656.38..44126.42 rows=30824 width=0) (actual time=344.487..1430.674 rows=2896367 loops=1) Recheck Cond: ((date_part('dow'::text, created) >= 3::double precision) AND (date_part('dow'::text, created) <= 5::double precision)) Rows Removed by Index Recheck: 7866 -> Bitmap Index Scan on bid_created_weekday (cost=0.00..648.68 rows=30824 width=0) (actual time=342.796..342.796 rows=2896367 loops=1) Index Cond: ((date_part('dow'::text, created) >= 3::double precision) AND (date_part('dow'::text, created) <= 5::double precision)) Total runtime: 1621.679 ms
CREATE FOREIGN TABLE nginx_log_2
(
remote_addr TEXT,
remote_user TEXT,
time_local TIMESTAMP WITH TIME ZONE,
request TEXT,
status TEXT,
body_bytes_sent TEXT,
http_referer TEXT,
http_user_agent TEXT
)
SERVER json_server
OPTIONS (filename '/home/ubolonton/Programming/projects/postgresql-examples/nginx-log/access.log.2.gz', max_error_count '99999');
-- Request counts by status SELECT status, COUNT(*) FROM nginx_log_cleaned GROUP BY status ORDER BY status; -- Average response size SELECT AVG(body_bytes_sent) FROM nginx_log_cleaned; -- All requests not from me SELECT COUNT(*) FROM nginx_log_cleaned WHERE http_user_agent NOT LIKE '%conkeror%'; -- Requests by hour WITH tmp AS ( SELECT *, EXTRACT(HOUR FROM (time_local AT TIME ZONE '+07:00')) AS hour FROM nginx_log_cleaned ) SELECT hour, COUNT(*) FROM tmp GROUP BY hour ORDER BY hour;
http://thebuild.com/presentations/pg-as-nosql-pgday-fosdem-2013.pdf
Years working with MySQL don't give as much understanding as 1 month of reading PostgreSQL documentation
ubolonton on Github, LinkedIn, Skype, Facebook, Twitter, Google+…
Any question, comment, feedback, or correction is welcome.