public/sql.md
... ...
@@ -0,0 +1,698 @@
1
+# SQL / Relational
2
+
3
+<style type="text/css">
4
+div .venn {
5
+ float: left;
6
+ width: 300px;
7
+ height: 300px;
8
+ border-radius: 150px;
9
+ border: 3px solid #000;
10
+ color: #000;
11
+ text-align: center;
12
+ vertical-align: middle;
13
+ font-size: 1.0em;
14
+ line-height: 300px;
15
+ font-weight: bold;
16
+ font-family: sans-serif;
17
+ position: relative;
18
+}
19
+.inner_one { background-color: yellow; opacity: 0.5; }
20
+.inner_two::before{ content: 'id = name_id'; position: absolute; left: 10px }
21
+.inner_two { background-color: red; opacity: 0.5; margin-left: -110px; }
22
+
23
+.left_one { background-color: #ff8040; opacity: 0.5; z-index: 1; }
24
+.left_two { background-color: red; opacity: 0.5; margin-left: -110px; }
25
+</style>
26
+
27
+### [1] Why we (I) love relational databases
28
+
29
+* **ACID**
30
+ * Atomicity (transactions [multiple operations can be "all or nothing"] - even with externalities such as power loss)
31
+ * Consistency (what's on disk is always acceptable as a state - including constraints, etc.)
32
+ * Isolation (no transaction affects another)
33
+ * Durability (if an insert completes, it is really written to disk)
34
+* Do we need these?
35
+* What apps don't need these things?
36
+ * If Facebook fails to provide you with 1 in 100,000 status updates from your friends, what is the cost?
37
+* Can we get these properties from a database distributed across a network? (CAP Theorem: See <http://www.youtube.com/watch?v=Jw1iFr4v58M> and <http://blog.beany.co.kr/wp-content/uploads/2011/03/nosql_cap.png>)
38
+
39
+### [2] Creating a database and connecting (from shell)
40
+
41
+ dropdb example
42
+ createdb example
43
+ psql example
44
+
45
+### [3] Dropping and creating a table
46
+
47
+ drop table names;
48
+ create table names (
49
+ id serial, -- "auto increment"
50
+ first_name text
51
+ );
52
+
53
+### [4] Another
54
+
55
+ drop table books;
56
+ create table books (
57
+ id serial, -- "auto increment"
58
+ name_id int,
59
+ title text
60
+ );
61
+
62
+### [5] Adding data
63
+
64
+ delete from names;
65
+ insert into names (first_name) values ('John');
66
+ insert into names (first_name) values ('James');
67
+ insert into names (first_name) values ('Tom');
68
+
69
+* **Tip:** Always give the column names so that you are not dependent on the created order
70
+
71
+### [6] more
72
+
73
+ delete from books;
74
+ insert into books (name_id, title) values (1, 'The C Programming Language');
75
+ insert into books (name_id, title) values (1, 'The Ruby Programming Language');
76
+ insert into books (name_id, title) values (2, 'Java Concurrency in Practice');
77
+ insert into books (name_id, title) values (null, 'SQL for Smarties');
78
+ -- Notice: We are assuming we know the values for names.id
79
+ -- What if we re-ran the inserts for names?
80
+
81
+### [7] query - wildcard for columns
82
+
83
+ select * from names;
84
+ select * from books;
85
+
86
+### [8] query - rearranging columns and specifying order
87
+
88
+ select first_name, id from names;
89
+ select name_id, id, title from books;
90
+
91
+### [9] cross join - return "product" (all combinations) of two tables
92
+
93
+ select *
94
+ from names, books;
95
+ -- don't do this with big tables
96
+
97
+### [10] same thing, but putting ids next together
98
+
99
+ select names.first_name, names.id as "names.id",
100
+ books.name_id as "books.name_id", books.id as "books.id", books.title
101
+ from names, books
102
+ order by names.id;
103
+ -- don't do this with big tables
104
+
105
+### [11] inner join - Venn diagram
106
+
107
+---
108
+
109
+<b>Inner join: What we want is represented by the <span style="background-color: #ff8040;">orange</span> section</b>
110
+<br/>
111
+
112
+<div class="venn inner_one">Names</div>
113
+<div class="venn inner_two">Books</div>
114
+<div style="clear: both;"/>
115
+
116
+<div>
117
+<div style="float: left;">
118
+<b>names</b>
119
+<table>
120
+ <tr><th>first_name</th><th>id</th></tr>
121
+ <tr><td>John</td><td style="background-color: yellow; opacity: 0.5;">1</td></tr>
122
+ <tr><td>James</td><td style="background-color: yellow; opacity: 0.5;">2</td></tr>
123
+ <tr><td>Tom</td><td style="background-color: yellow; opacity: 0.5;">3</td></tr>
124
+</table>
125
+</div>
126
+
127
+<div style="float: right;">
128
+<b>books</b>
129
+<table>
130
+ <tr><th>name_id</th><th>id</th><th>title</th></tr>
131
+ <tr><td style="background-color: yellow; opacity: 0.5;">1</td><td style="background-color: red; opacity: 0.5;">1</td><td>The C Programming Language</td></tr>
132
+ <tr><td style="background-color: yellow; opacity: 0.5;">1</td><td style="background-color: red; opacity: 0.5;">2</td><td>The Ruby Programming Language</td></tr>
133
+ <tr><td style="background-color: yellow; opacity: 0.5;">2</td><td style="background-color: red; opacity: 0.5;">3</td><td>Java Concurrency in Practice</td></tr>
134
+ <tr><td>(null)</td><td style="background-color: red; opacity: 0.5;">4</td><td>SQL for Smarties</td></tr>
135
+</table>
136
+</div>
137
+</div>
138
+<div style="clear: both;"/>
139
+<br/>
140
+
141
+---
142
+
143
+### [12] inner join - SQL - "Give me data from rows from both tables where the condition is true"
144
+
145
+ -- "explicit join notation"
146
+ select first_name, title
147
+ from books join names
148
+ on names.id = books.name_id;
149
+
150
+ -- "implicit join notation"
151
+ select first_name, title
152
+ from books, names
153
+ where names.id = books.name_id;
154
+
155
+ -- Same as "product" join above, but using where clause
156
+ select names.first_name, names.id as "names.id",
157
+ books.name_id as "books.name_id", books.id as "books.id", books.title
158
+ from names, books
159
+ where names.id = books.name_id
160
+ order by names.id;
161
+
162
+### [13] left join - Venn diagram
163
+
164
+---
165
+
166
+<b>Left join: What we want is represented by the <span style="background-color: #ffc0a0;">salmon</span> and <span style="background-color: #ff8040;">orange</span> sections</b>
167
+<br/>
168
+
169
+<div class="venn left_one">Names</div>
170
+<div class="venn left_two">Books</div>
171
+<div style="clear: both;"/>
172
+
173
+<div>
174
+<div style="float: left;">
175
+<b>names</b>
176
+<table>
177
+ <tr><th>first_name</th><th>id</th></tr>
178
+ <tr><td>John</td><td style="background-color: #ffc0a0;">1</td></tr>
179
+ <tr><td>James</td><td style="background-color: #ffc0a0;">2</td></tr>
180
+ <tr><td>Tom</td><td style="background-color: #ffc0a0;">3</td></tr>
181
+</table>
182
+</div>
183
+
184
+<div style="float: right;">
185
+<b>books</b>
186
+<table>
187
+ <tr><th>name_id</th><th>id</th><th>title</th></tr>
188
+ <tr><td style="background-color: #ffc0a0;">1</td><td style="background-color: red; opacity: 0.5;">1</td><td>The C Programming Language</td></tr>
189
+ <tr><td style="background-color: #ffc0a0;">1</td><td style="background-color: red; opacity: 0.5;">2</td><td>The Ruby Programming Language</td></tr>
190
+ <tr><td style="background-color: #ffc0a0;">2</td><td style="background-color: red; opacity: 0.5;">3</td><td>Java Concurrency in Practice</td></tr>
191
+ <tr><td>(null)</td><td style="background-color: red; opacity: 0.5;">4</td><td>SQL for Smarties</td></tr>
192
+</table>
193
+</div>
194
+</div>
195
+<div style="clear: both;"/>
196
+<br/>
197
+
198
+---
199
+
200
+### [14] left join - SQL - "Give me data for 'left' table, and data the 'right' table when the condition is true, but nulls for the 'right' table when the condition is false"
201
+
202
+ select names.first_name, books.title
203
+ from names left join books
204
+ on books.name_id = names.id;
205
+
206
+ first_name | title
207
+ ------------+-------------------------------
208
+ John | The C Programming Language
209
+ John | The Ruby Programming Language
210
+ James | Java Concurrency in Practice
211
+ Tom |
212
+ (4 rows)
213
+
214
+### [15] left join is dominate pattern in ActiveRecord . . .
215
+
216
+* why?
217
+
218
+### [16] left join is dominate pattern in ActiveRecord
219
+
220
+* when we send an association message to a relation, we expect that association to be applied to each element
221
+* Note, however, that ActiveRecord avoids real left joins
222
+
223
+### [17] Now that you're so smart
224
+
225
+* How many rows will each of the following queries return?
226
+
227
+ select * from names left join books on books.name_id = names.id;
228
+ select * from names right join books on books.name_id = names.id;
229
+ select * from books left join names on books.name_id = names.id;
230
+ select * from books right join names on books.name_id = names.id;
231
+
232
+### [18] demo - study the log
233
+
234
+ irb
235
+ require 'active_record'
236
+ require 'pg'
237
+ require 'logger'
238
+ B = ActiveRecord::Base
239
+ B.logger = Logger.new(STDOUT)
240
+ M = ActiveRecord::Migration
241
+
242
+ spec = { adapter: :postgresql, database: :ex }
243
+ B.establish_connection(adapter: :postgresql, database: :postgres, schema_search_path: :public)
244
+ B.connection.drop_database spec[:database]
245
+ B.connection.create_database spec[:database]
246
+ B.establish_connection spec
247
+
248
+ B.mass_assignment_sanitizer = :logger
249
+ CN = Class.new(M) { def change; create_table :names do |t| t.text :first_name end; end }
250
+ CB = Class.new(M) { def change; create_table :books do |t| t.text :title; t.references :name; end; end }
251
+ CN.migrate(:up)
252
+ CB.migrate(:up)
253
+ Name = Class.new(B) { has_many :books }
254
+ Book = Class.new(B) { belongs_to :name }
255
+
256
+ john = Name.create! first_name: 'John'
257
+ james = Name.create! first_name: 'James'
258
+ tom = Name.create! first_name: 'Tom'
259
+ c = Book.create! name: john, title: 'The C Programming Language'
260
+ ruby = Book.create! name: john, title: 'The Ruby Programming Language'
261
+ java = Book.create! name: james, title: 'Java Concurrency in Practice'
262
+
263
+ # Let's discuss what we see in the log
264
+
265
+ Name.all(include: :books)
266
+ Name.all(joins: :books)
267
+ Name.all(joins: 'left join books on books.name_id = names.id')
268
+ Name.all(joins: 'left join books on books.name_id = names.id', include: :books)
269
+
270
+### [19] obviously not the way we would represent book ownership for realsies
271
+
272
+* We would use a join table in between names and books - Now a book can be owned by more than one person (but notice: this is arguably the wrong modeling)
273
+
274
+<div>
275
+<div style="float: left;">
276
+<b>names</b>
277
+<table>
278
+ <tr><th>first_name</th><th>id</th></tr>
279
+ <tr><td>John</td><td style="background-color: #ffc0a0;">1</td></tr>
280
+ <tr><td>James</td><td style="background-color: #ffc0a0;">2</td></tr>
281
+ <tr><td>Tom</td><td style="background-color: #ffc0a0;">3</td></tr>
282
+ <tr><td>Patrick</td><td style="background-color: #ffc0a0;">4</td></tr>
283
+</table>
284
+</div>
285
+
286
+<div style="float: left; padding-left: 100px;">
287
+ <b>names_books</b>
288
+ <table>
289
+ <tr><th>name_id</th><th>book_id</th></tr>
290
+ <tr><td style="background-color: #ffc0a0;">1</td><td style="background-color: red; opacity: 0.5;">1</td></tr>
291
+ <tr><td style="background-color: #ffc0a0;">1</td><td style="background-color: red; opacity: 0.5;">2</td></tr>
292
+ <tr><td style="background-color: #ffc0a0;">1</td><td style="background-color: red; opacity: 0.5;">4</td></tr>
293
+ <tr><td style="background-color: #ffc0a0;">2</td><td style="background-color: red; opacity: 0.5;">3</td></tr>
294
+ <tr><td style="background-color: #ffc0a0;">3</td><td style="background-color: red; opacity: 0.5;">4</td></tr>
295
+ </table>
296
+</div>
297
+
298
+<div style="float: right;">
299
+<b>books</b>
300
+<table>
301
+ <tr><th>id</th><th>title</th></tr>
302
+ <tr><td style="background-color: red; opacity: 0.5;">1</td><td>The C Programming Language</td></tr>
303
+ <tr><td style="background-color: red; opacity: 0.5;">2</td><td>The Ruby Programming Language</td></tr>
304
+ <tr><td style="background-color: red; opacity: 0.5;">3</td><td>Java Concurrency in Practice</td></tr>
305
+ <tr><td style="background-color: red; opacity: 0.5;">4</td><td>SQL for Smarties</td></tr>
306
+ <tr><td style="background-color: red; opacity: 0.5;">5</td><td>Specification by Example</td></tr>
307
+</table>
308
+</div>
309
+</div>
310
+<div style="clear: both;"/>
311
+<br/>
312
+
313
+* Names and ownership
314
+
315
+ select names.first_name, books.title
316
+ from names, books, names_books
317
+ where names.id = names_books.name_id
318
+ and books.id = names_books.book_id;
319
+
320
+### [20] explain plan
321
+
322
+* table with slow query
323
+
324
+ drop table example;
325
+ create table example
326
+ as select generate_series(1, 1000000) as id,
327
+ 'some message'::varchar(100) as message;
328
+ insert into example values (1000001, 'different message');
329
+ select * from example where message = 'different message'; -- 250ms
330
+
331
+* see the query plan
332
+
333
+ explain select * from example where message = 'different message';
334
+
335
+* add an index, re-run query, and check plan again
336
+
337
+ create index on example (message);
338
+ select * from example where message = 'different message'; --16ms
339
+ explain select * from example where message = 'different message';
340
+
341
+### [21] Aggregation and group by, having, etc.
342
+
343
+* `group by` provides for a means of showing the distinct values for a column
344
+
345
+ select message from example group by message;
346
+ -- compare:
347
+ -- select distinct message from example;
348
+
349
+* But even better, we can aggregate according to the groups
350
+
351
+ select message, count(*) from example group by message;
352
+
353
+* But even mo' better, we can as for groups `having` certain characteristics
354
+
355
+ select message, count(*) from example group by message having count(*) = 1;
356
+
357
+### [22] Subquery expressions
358
+
359
+* You want a predicate that checks another table
360
+* Example: find all patients with an appointment
361
+
362
+ select count(*) from patients
363
+ where exists (
364
+ select 1
365
+ from appointment_storages
366
+ where appointment_storages.patient_id = patients.id
367
+ );
368
+
369
+* Other subquery expressions: `in`, `not in`, `any`; see docs (<http://www.postgresql.org/docs/9.2/static/functions-subquery.html>)
370
+* Why? Expressive. Compare this wrong query trying to do the same thing
371
+
372
+ select count(*)
373
+ from patients, appointment_storages
374
+ where appointment_storages.patient_id = patients.id;
375
+
376
+* And compare this correct query trying to do the same thing
377
+
378
+ select count(distinct patients.id)
379
+ from patients, appointment_storages
380
+ where appointment_storages.patient_id = patients.id;
381
+
382
+### [23] More about explain
383
+
384
+* Can see cost estimates; first value is startup, second is doing the work
385
+
386
+ explain select count(*) from patients
387
+ where exists (
388
+ select 1
389
+ from appointment_storages
390
+ where appointment_storages.patient_id = patients.id
391
+ );
392
+
393
+* Can also run (with `analyze`) to see actuals vs. estimates
394
+
395
+ explain analyze select count(*) from patients
396
+ where exists (
397
+ select 1
398
+ from appointment_storages
399
+ where appointment_storages.patient_id = patients.id
400
+ );
401
+
402
+* Hey, why is explain telling me it's going to do a sequential scan even though I have an index?
403
+* pgadmin has a visual explainer (demo)
404
+* Recommended: <http://wiki.postgresql.org/images/4/45/Explaining_EXPLAIN.pdf>
405
+* Want more? See _PostgreSQL 9.0 High Performance_ (in Engineering library)
406
+
407
+### [24] self-joins
408
+
409
+* The classic problem: Find all instances of a staff member creating a task before another is completed.
410
+* Always explore your data first **<-- best practice**
411
+* First find a user with not too many tasks
412
+
413
+ select users.id, users.email, practices.name, count(tasks.*)
414
+ from users, tasks, practices
415
+ where tasks.created_by_id = users.id
416
+ and practices.id = users.practice_id
417
+ group by users.id, users.email, practices.name
418
+ having count(tasks.*) < 50
419
+ order by count(tasks.*) desc;
420
+
421
+* Look at tasks for such a user to get a feel for the data
422
+
423
+ select tasks.title, tasks.created_at, tasks.completed_at
424
+ from tasks, users
425
+ where tasks.created_by_id = users.id
426
+ and users.id = 66
427
+ order by created_at asc;
428
+
429
+* now join tasks with tasks
430
+
431
+ select a.title || ' (' || a.id || ') overlaps ' || b.title || ' (' || b.id || ')'
432
+ from tasks a, tasks b
433
+ where a.created_by_id = b.created_by_id
434
+ and b.created_at >= a.created_at
435
+ and b.created_at <= a.completed_at
436
+ and a.id != b.id
437
+ and a.created_by_id = 66
438
+ and b.created_by_id = 66
439
+ order by a.id asc;
440
+
441
+* inspect some examples
442
+
443
+ select * from tasks where id in (17359, 17401, 17402, 20457);
444
+
445
+### [25] Another: Find the first weight measurement (or null) for every patient
446
+
447
+* Let's get the earliest measured_at for each patient/marker combination
448
+
449
+ select patient_id, marker_cname, min(measured_at) as first_measured_at
450
+ from measurements
451
+ group by patient_id, marker_cname;
452
+
453
+* We are using min() here. Could there be two measurements for the same patient and marker with the same measured_at?
454
+
455
+ select m.patient_id, m.marker_cname, m.measured_at, count(*)
456
+ from (
457
+ select patient_id, marker_cname, min(measured_at) as first_measured_at
458
+ from measurements
459
+ group by patient_id, marker_cname) firsts,
460
+ measurements m
461
+ where firsts.patient_id = m.patient_id
462
+ and firsts.marker_cname = m.marker_cname
463
+ and firsts.first_measured_at = m.measured_at
464
+ group by m.patient_id, m.marker_cname, m.measured_at
465
+ having count(*) > 1;
466
+
467
+* Yes. Well that sucks. That might be a bug or deserve a constraint. What we will do is get the first id for the earliest measured_at for each patient/marker combination.
468
+
469
+ select m.patient_id, m.marker_cname, min(id) as id
470
+ from (
471
+ select patient_id, marker_cname, min(measured_at) as first_measured_at
472
+ from measurements
473
+ group by patient_id, marker_cname) firsts,
474
+ measurements m
475
+ where firsts.patient_id = m.patient_id
476
+ and firsts.marker_cname = m.marker_cname
477
+ and firsts.first_measured_at = m.measured_at
478
+ group by m.patient_id, m.marker_cname;
479
+
480
+* Now this is a very useful set of rows. We can do left joins from the patients table into this and select for various markers; and then join from this table into measurements. We can create a temporary table and direct PG to drop it after a commit. Then we can use and re-use this temporary table in
481
+a series of left joins.
482
+
483
+ create temporary table firsts on commit drop as
484
+ select m.patient_id, m.marker_cname, min(id) as id
485
+ from (
486
+ select patient_id, marker_cname, min(measured_at) as first_measured_at
487
+ from measurements
488
+ group by patient_id, marker_cname) firsts,
489
+ measurements m
490
+ where firsts.patient_id = m.patient_id
491
+ and firsts.marker_cname = m.marker_cname
492
+ and firsts.first_measured_at = m.measured_at
493
+ group by m.patient_id, m.marker_cname;
494
+
495
+ select p.id, weight, weight_first_measured_at, height, height_first_measured_at, ldl, ldl_first_measured_at
496
+ from patients p
497
+ left join (
498
+ select f.patient_id, m.value as weight, m.measured_at as weight_first_measured_at
499
+ from firsts f, measurements m where f.marker_cname = 'weight' and m.id = f.id) weights
500
+ on weights.patient_id = p.id
501
+ left join (
502
+ select f.patient_id, m.value as height, m.measured_at as height_first_measured_at
503
+ from firsts f, measurements m where f.marker_cname = 'height' and m.id = f.id) heights
504
+ on heights.patient_id = p.id
505
+ left join (
506
+ select f.patient_id, m.value as ldl, m.measured_at as ldl_first_measured_at
507
+ from firsts f, measurements m where f.marker_cname = 'ldl' and m.id = f.id) ldls
508
+ on ldls.patient_id = p.id
509
+ ;
510
+
511
+### [26] PG "distinct on"
512
+
513
+* When you want the first row in a group, you can use PG's "distinct on" clause (<http://www.postgresql.org/docs/9.2/static/sql-select.html#SQL-DISTINCT>)
514
+* But watch out, this can return surprising results
515
+* People commonly use it to mean: "Give me the 'top' something in this group" -- but what if there is a tie? Example
516
+
517
+ drop table if exists employees;
518
+ create table employees (
519
+ name varchar(100),
520
+ department integer,
521
+ salary integer);
522
+ insert into employees (name, department, salary) values ('John', 1, 20000);
523
+ insert into employees (name, department, salary) values ('Joe', 1, 20000);
524
+ insert into employees (name, department, salary) values ('Jill', 2, 40000);
525
+
526
+ select department, max(salary) as max_salary
527
+ from employees
528
+ group by department;
529
+
530
+ select e.department, e.name, e.salary
531
+ from employees e,
532
+ (select department, max(salary) as max_salary
533
+ from employees
534
+ group by department) maxs
535
+ where e.department = maxs.department
536
+ and e.salary = maxs.max_salary;
537
+
538
+ select distinct on (department) *
539
+ from employees
540
+ order by department, salary desc;
541
+
542
+* Advice: Learn your SQL according to the standard (see Appendix B in _SQL Cookbook_)
543
+
544
+### [27] What is a PG database?
545
+
546
+* A PG database is that to which you connect
547
+* Got it? You can only query a database to which you are connected
548
+* A database contains schemas which contain tables
549
+* We don't deal with schemas because our tables are in the default schema (`public`)
550
+
551
+### [28] What is a PG schema?
552
+
553
+* Think of a PostgreSQL schema as a prefix to a table name (public.patients)
554
+* A database can have tables in different schemas
555
+* PG will match unprefixed table names according to a search path
556
+* Why do we like it?
557
+ * Tables in a schema can be dropped together
558
+ * Tables in a schema can be authorized separately
559
+ * Tables can have the same name if in different schemas
560
+
561
+### [29] What is a PG tablespace?
562
+
563
+* A PG tablespace is a location in the file system where tables (etc.) go
564
+* Put frequently-queried tables on a fast disk
565
+* Make it easier for backups
566
+* NOTE: In Master/Slave, must have the identical tablespace (location) on both machines
567
+
568
+### [30] When should I add an index?
569
+
570
+* When your query is slow
571
+* What is slow?
572
+ * Look at the logs and compare to other queries
573
+* Will an index help me?
574
+ * Try `explain analyze` on your query
575
+ * Add an index
576
+ * Rerun `explain analyze`
577
+* Don't waste your time on this exercise on your development machine (PG will for instance just use sequential access for small data sets, 'cos it's fast); use something closer to production
578
+
579
+### [31] What is a constraint?
580
+
581
+* Check constraint: Disallow insertion of a row where a column value violates a rule (e.g., height > 144 inches)
582
+* Not null constraint: Disallow insertion of a row where a column value is null
583
+* Foreign key constraint: Disallow insertion of a row with foreign key value not found as a primary key in the other table
584
+
585
+### [32] Why don't Rails developers use foreign key constraints? (and other constraints)
586
+
587
+* History: Very difficult to create an ORM that can define constraint types in a database-agnostic way
588
+ * Surely you jest?
589
+* History: Lack of clarity around tier responsibility: E.g., who should validate?
590
+* History: Inexperience with transactions
591
+* History: Probably not important in Basecamp (except for billing)
592
+* History: There was a time when people liked polymorphic associations in ActiveRecord; and how would you constrain such keys? (Triggers?)
593
+* Lazy
594
+
595
+### [33] Why don't *we* use foreign key constraints? (and other constraints)
596
+
597
+* Lazy
598
+* To date, stories have not defined data semantics, only behavioral semantics
599
+* To date, stories have not defined cost of anomalies
600
+* schema.rb
601
+
602
+### [34] pros and cons of deleting rows from the DB (vs. flagging them as 'deleted')
603
+
604
+* Discussion
605
+
606
+### [35] pros and cons of free-text fields
607
+
608
+* Discussion
609
+
610
+### [36] editing data in-place vs. storing new versions with each edit
611
+
612
+* Discussion
613
+
614
+### [37] Recommendations for Further Reading
615
+
616
+1. "SQL Reference" in Enterprise Java in a Nutshell (O'Reilly, 2005)
617
+2. Molinaro, SQL Cookbook (O'Reilly, 2005)
618
+3. Rozenshtein, The Essence of SQL (1997)
619
+4. Celko, SQL for Smarties (2010) (other books can be good; some people like The SQL Puzzle Book)
620
+5. Karwin, SQL Antipatterns (2010) (considers EAV to be an anti-pattern; discusses rounding errors; problems with grouping; much else)
621
+6. Venn diagrams: <http://www.codinghorror.com/blog/2007/10/a-visual-explanation-of-sql-joins.html>
622
+
623
+### [37] Resources to avoid
624
+
625
+0. Elmasri, Fundamentals of Database Systems (6th ed., 2010). Regrettably, this book is a mess and tends to go for the most impenetrable explanation possible. Still, it has stuff on modeling and normalization that is hard to find.
626
+1. Anything on the web; seriously, watch out. People say really stupid stuff about SQL.
627
+2. Any book that is focused on one database. You will be unhappy.
628
+
629
+---
630
+
631
+### [38] Populating names and books with join table
632
+
633
+ drop table names;
634
+ create table names (
635
+ id serial, -- "auto increment"
636
+ first_name text
637
+ );
638
+ drop table books;
639
+ create table books (
640
+ id serial, -- "auto increment"
641
+ title text
642
+ );
643
+
644
+ delete from names;
645
+ insert into names (first_name) values ('John');
646
+ insert into names (first_name) values ('James');
647
+ insert into names (first_name) values ('Tom');
648
+ insert into names (first_name) values ('Patrick');
649
+
650
+ delete from books;
651
+ insert into books (title) values ('The C Programming Language');
652
+ insert into books (title) values ('The Ruby Programming Language');
653
+ insert into books (title) values ('Java Concurrency in Practice');
654
+ insert into books (title) values ('SQL for Smarties');
655
+ insert into books (title) values ('Specification by Example');
656
+
657
+ drop table if exists names_books;
658
+ create table names_books (
659
+ name_id integer,
660
+ book_id integer
661
+ );
662
+
663
+ insert into names_books values ((select id from names where first_name = 'John'), (select id from books where title = 'The C Programming Language'));
664
+ insert into names_books values ((select id from names where first_name = 'John'), (select id from books where title = 'The Ruby Programming Language'));
665
+ insert into names_books values ((select id from names where first_name = 'John'), (select id from books where title = 'SQL for Smarties'));
666
+ insert into names_books values ((select id from names where first_name = 'James'), (select id from books where title = 'Java Concurrency in Practice'));
667
+ insert into names_books values ((select id from names where first_name = 'Tom'), (select id from books where title = 'SQL for Smarties'));
668
+
669
+
670
+ select names.first_name, books.title
671
+ from names, books, names_books
672
+ where names.id = names_books.name_id
673
+ and books.id = names_books.book_id;
674
+
675
+## Other topics
676
+
677
+### [39] The meaning of null
678
+
679
+See Elmasri.
680
+
681
+### [40] SQL injection
682
+
683
+### [41] Prepared statements
684
+
685
+### [42] Domain modeling
686
+
687
+See Elmasri.
688
+
689
+### [43] Normalization
690
+
691
+See Elmasri.
692
+
693
+### [44] Star schemas
694
+
695
+### [45] Data warehousing
696
+
697
+### [46] ETL
698
+