SQL basics

Setup

  • Download ipython-sql from here or use pip install ipython-sql
  • Download Iris dataset in csv format from here
  • Install postgresql locally
In [1]:
import pandas as pd
from pandas import DataFrame
In [2]:
%load_ext sql
%config SqlMagic.feedback = False
%sql postgresql://jloach@/
Out[2]:
'Connected: jloach@'

Make a database to play with

In [3]:
%%sql
DROP TABLE parishes; -- Delete existing table under this name
CREATE TABLE parishes(
  id          INT PRIMARY KEY NOT NULL, 
  name        TEXT,
  population  INT,
  loc_n       REAL,
  loc_w       REAL
);
INSERT INTO parishes VALUES (1, 'Blymhill & Weston-under-Lizard', 823, 52.702, 2.284);
INSERT INTO parishes VALUES (2, 'Lapley, Stretton and Wheaton Aston', 2548, 52.71, 2.2);
INSERT INTO parishes VALUES (3, 'Church Eaton', 680, 52.757, 2.222);
INSERT INTO parishes VALUES (4, 'Penkridge', 8526, NULL, NULL);
INSERT INTO parishes VALUES (5, 'Sheriffhales', 722, NULL, NULL);
Out[3]:
[]

SELECT

In [4]:
%sql SELECT * FROM parishes
Out[4]:
id name population loc_n loc_w
1 Blymhill & Weston-under-Lizard 823 52.702 2.284
2 Lapley, Stretton and Wheaton Aston 2548 52.71 2.2
3 Church Eaton 680 52.757 2.222
4 Penkridge 8526 None None
5 Sheriffhales 722 None None
In [5]:
%%sql
SELECT name as "Parish",
       population as "Population" 
  FROM parishes
Out[5]:
Parish Population
Blymhill & Weston-under-Lizard 823
Lapley, Stretton and Wheaton Aston 2548
Church Eaton 680
Penkridge 8526
Sheriffhales 722
In [6]:
%%sql
SELECT name as "Parish",
       ROUND(population/10.) as "Population/10" 
  FROM parishes
Out[6]:
Parish Population/10
Blymhill & Weston-under-Lizard 82
Lapley, Stretton and Wheaton Aston 255
Church Eaton 68
Penkridge 853
Sheriffhales 72

LIMIT

In [7]:
%%sql
SELECT *
  FROM parishes 
 LIMIT 2
Out[7]:
id name population loc_n loc_w
1 Blymhill & Weston-under-Lizard 823 52.702 2.284
2 Lapley, Stretton and Wheaton Aston 2548 52.71 2.2

WHERE

In [8]:
%%sql
SELECT * 
  FROM parishes
 WHERE population > 1000
Out[8]:
id name population loc_n loc_w
2 Lapley, Stretton and Wheaton Aston 2548 52.71 2.2
4 Penkridge 8526 None None
In [9]:
%%sql
SELECT * 
  FROM parishes 
 WHERE name > 'L'
Out[9]:
id name population loc_n loc_w
2 Lapley, Stretton and Wheaton Aston 2548 52.71 2.2
4 Penkridge 8526 None None
5 Sheriffhales 722 None None

LIKE / ILIKE

In [10]:
%%sql
SELECT * 
  FROM parishes 
 WHERE name 
  LIKE 'B%'
Out[10]:
id name population loc_n loc_w
1 Blymhill & Weston-under-Lizard 823 52.702 2.284
In [11]:
%%sql
SELECT * 
  FROM parishes 
 WHERE name 
  LIKE 'P_______e'
Out[11]:
id name population loc_n loc_w
4 Penkridge 8526 None None
In [12]:
%%sql
SELECT * 
  FROM parishes 
 WHERE name 
 ILIKE 'b%'
Out[12]:
id name population loc_n loc_w
1 Blymhill & Weston-under-Lizard 823 52.702 2.284

IN

In [13]:
%%sql
SELECT * 
  FROM parishes 
 WHERE name IN ('Penkridge', 'Church Eaton')
Out[13]:
id name population loc_n loc_w
3 Church Eaton 680 52.757 2.222
4 Penkridge 8526 None None

BETWEEN

In [14]:
%%sql
SELECT * 
  FROM parishes 
 WHERE population BETWEEN 500 AND 2000
Out[14]:
id name population loc_n loc_w
1 Blymhill & Weston-under-Lizard 823 52.702 2.284
3 Church Eaton 680 52.757 2.222
5 Sheriffhales 722 None None

NULL

In [15]:
%%sql
SELECT *
  FROM parishes 
 WHERE loc_n IS NULL
Out[15]:
id name population loc_n loc_w
4 Penkridge 8526 None None
5 Sheriffhales 722 None None

OR / AND / NOT

In [16]:
%%sql
SELECT * 
  FROM parishes 
 WHERE (name LIKE 'P%' OR population < 800) AND NOT loc_n IS NULL
Out[16]:
id name population loc_n loc_w
3 Church Eaton 680 52.757 2.222

ORDER BY / DESC

In [17]:
%%sql
SELECT *
  FROM parishes
 ORDER BY population DESC
Out[17]:
id name population loc_n loc_w
4 Penkridge 8526 None None
2 Lapley, Stretton and Wheaton Aston 2548 52.71 2.2
1 Blymhill & Weston-under-Lizard 823 52.702 2.284
5 Sheriffhales 722 None None
3 Church Eaton 680 52.757 2.222
In [18]:
%%sql
SELECT name, population
  FROM parishes
 ORDER BY 2 -- Doesn't' work in all SQL variants 
Out[18]:
name population
Church Eaton 680
Sheriffhales 722
Blymhill & Weston-under-Lizard 823
Lapley, Stretton and Wheaton Aston 2548
Penkridge 8526
In [19]:
%%sql
SELECT name, population, loc_n
  FROM parishes
 ORDER BY population, loc_n -- Though this isn't meaningful here
Out[19]:
name population loc_n
Church Eaton 680 52.757
Sheriffhales 722 None
Blymhill & Weston-under-Lizard 823 52.702
Lapley, Stretton and Wheaton Aston 2548 52.71
Penkridge 8526 None