Preamble

## Importing packages
library(tidyverse) # metapackage with lots of helpful functions
library(magrittr)
library(skimr)

Load the data

police_stops <- read_rds('https://stacks.stanford.edu/file/druid:yg821jf8611/yg821jf8611_la_new_orleans_2020_04_01.rds')
data <- police_stops 
#data <- read_csv('https://stacks.stanford.edu/file/druid:yg821jf8611/yg821jf8611_la_new_orleans_2020_04_01.csv.zip')
data %>% glimpse()
Rows: 512,092
Columns: 32
$ raw_row_number     <chr> "1", "9087", "9086", "267", "2", "7", "3", "4", "5", "8", "6", "27", "28", "16", "5153", "…
$ date               <date> 2010-01-01, 2010-01-01, 2010-01-01, 2010-01-01, 2010-01-01, 2010-01-01, 2010-01-01, 2010-…
$ time               <time> 01:11:00, 01:29:00, 01:29:00, 14:00:00, 02:06:00, 02:06:00, 02:06:00, 02:06:00, 02:06:00,…
$ location           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ lat                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ lng                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ district           <chr> "6", "7", "7", "7", "5", "5", "5", "5", "5", "5", "5", "6", "6", "8", "7", "6", "6", "6", …
$ zone               <chr> "E", "C", "C", "I", "D", "D", "D", "D", "D", "D", "D", "F", "F", "C", "Q", "I", "J", "D", …
$ subject_age        <int> 26, 37, 37, 96, 17, 18, 18, 30, 21, 23, 20, 32, 28, 42, 24, 54, 66, 47, 12, 21, 31, 59, 32…
$ subject_race       <fct> black, black, black, black, black, black, black, black, black, black, black, black, black,…
$ subject_sex        <fct> female, male, male, male, male, male, male, male, male, male, male, male, male, male, male…
$ officer_assignment <chr> "6th  District", "7th  District", "7th  District", "7th  District", "5th  District", "5th …
$ type               <fct> vehicular, vehicular, vehicular, vehicular, NA, NA, NA, NA, NA, NA, NA, vehicular, vehicul…
$ arrest_made        <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ citation_issued    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ warning_issued     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ outcome            <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ contraband_found   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ contraband_drugs   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ contraband_weapons <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ frisk_performed    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ search_conducted   <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ search_person      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ search_vehicle     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ search_basis       <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ reason_for_stop    <chr> "TRAFFIC VIOLATION", "TRAFFIC VIOLATION", "TRAFFIC VIOLATION", "TRAFFIC VIOLATION", "CALL …
$ vehicle_color      <chr> "BLACK", "BLUE", "BLUE", "GRAY", NA, NA, NA, NA, NA, NA, NA, "BLUE", "BLUE", "YELLOW", "SI…
$ vehicle_make       <chr> "DODGE", "NISSAN", "NISSAN", "JEEP", NA, NA, NA, NA, NA, NA, NA, "GMC - GENERAL MOTORS COR…
$ vehicle_model      <chr> "CARAVAN", "MURANO", "MURANO", "GRAND CHEROKEE", NA, NA, NA, NA, NA, NA, NA, "OTHER", "OTH…
$ vehicle_year       <int> 2005, 2005, 2005, 2003, NA, NA, NA, NA, NA, NA, NA, 1997, 1997, NA, 2001, 1995, 2006, NA, …
$ raw_actions_taken  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ raw_subject_race   <chr> "BLACK", "BLACK", "BLACK", "BLACK", "BLACK", "BLACK", "BLACK", "BLACK", "BLACK", "BLACK", …
data %>% skim()
── Data Summary ────────────────────────
                           Values    
Name                       Piped data
Number of rows             512092    
Number of columns          32        
_______________________              
Column type frequency:               
  character                11        
  Date                     1         
  difftime                 1         
  factor                   5         
  logical                  10        
  numeric                  4         
________________________             
Group variables            None      

── Variable type: character ───────────────────────────────────────────────────────────────────────────────────────────
   skim_variable      n_missing complete_rate   min   max empty n_unique whitespace
 1 raw_row_number             0         1         1   174     0   512092          0
 2 location               95986         0.813     2    57     0    32229          0
 3 district                   0         1         1     5     0       12          0
 4 zone                       0         1         1    13     0       36          0
 5 officer_assignment       123         1.00      3    27     0       20          0
 6 reason_for_stop            0         1         5    45     0       17          0
 7 vehicle_color         239138         0.533     3    98     0       64          0
 8 vehicle_make          235765         0.540     2   218     0      161          0
 9 vehicle_model         252982         0.506     2   146     0      405          0
10 raw_actions_taken     122455         0.761    19   447     0    10921          0
11 raw_subject_race       11730         0.977     5    10     0        6          0

── Variable type: Date ────────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable n_missing complete_rate min        max        median     n_unique
1 date                  4          1.00 2010-01-01 2018-07-18 2014-01-29     3121

── Variable type: difftime ────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable n_missing complete_rate min    max        median     n_unique
1 time                  0             1 0 secs 86340 secs 49860 secs     1440

── Variable type: factor ──────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable n_missing complete_rate ordered n_unique top_counts                                     
1 subject_race      11730         0.977 FALSE          6 bla: 349819, whi: 129703, his: 13491, asi: 3793
2 subject_sex       11730         0.977 FALSE          2 mal: 354304, fem: 146058                       
3 type             149907         0.707 FALSE          2 veh: 298356, ped: 63829                        
4 outcome          176487         0.655 FALSE          3 war: 123832, cit: 117609, arr: 94164, sum: 0   
5 search_basis     436301         0.148 FALSE          4 oth: 52863, pro: 12398, con: 5525, pla: 5005   

── Variable type: logical ─────────────────────────────────────────────────────────────────────────────────────────────
   skim_variable      n_missing complete_rate   mean count                   
 1 arrest_made                0         1     0.184  FAL: 417928, TRU: 94164 
 2 citation_issued            0         1     0.272  FAL: 372758, TRU: 139334
 3 warning_issued             0         1     0.248  FAL: 384853, TRU: 127239
 4 contraband_found      436301         0.148 0.187  FAL: 61594, TRU: 14197  
 5 contraband_drugs      436301         0.148 0.126  FAL: 66218, TRU: 9573   
 6 contraband_weapons    436301         0.148 0.0411 FAL: 72678, TRU: 3113   
 7 frisk_performed            0         1     0.124  FAL: 448645, TRU: 63447 
 8 search_conducted           0         1     0.148  FAL: 436301, TRU: 75791 
 9 search_person              0         1     0.117  FAL: 452052, TRU: 60040 
10 search_vehicle             0         1     0.0683 FAL: 477095, TRU: 34997 

── Variable type: numeric ─────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable n_missing complete_rate   mean      sd     p0    p25    p50    p75   p100 hist 
1 lat              251684         0.509   30.0  0.0321   29.7   29.9   30.0   30.0   30.3 ▁▁▇▁▁
2 lng              251684         0.509  -90.1  0.0418  -93.2  -90.1  -90.1  -90.0  -89.7 ▁▁▁▁▇
3 subject_age       12786         0.975   34.5 13.2      10     24     31     44    110   ▇▆▂▁▁
4 vehicle_year     240388         0.531 2004.   9.60   1900   2001   2005   2009   2020   ▁▁▁▁▇
# Drop all rows where stop_data, time, and driver_gender are missing
data %<>%
  drop_na(subject_race, subject_sex)
data %<>% 
  mutate(date = date %>% as.character())
library(lubridate)
data %>% pull(date) %>% as_date() %>% is.Date()
[1] TRUE

Identify poor neighborhoors

data %>% 
  count(vehicle_make, sort = TRUE)
data %<>%
  mutate(vehicle_rich = vehicle_make %in% c('JEEP', 'LEXUS', 'MERCEDES', 'BMW', 'PORSCHE', 'JAGUAR', 'HUMMER ALL-TERRAIN VEHICLE'))
data %>%
  group_by(vehicle_rich) %>%
  summarize(mean_age = mean(vehicle_year, na.rm = TRUE))
NA
data %>%
  count(vehicle_rich, subject_race)
districts_rich <- data %>%
  group_by(district) %>%
  summarise(n = n(), 
            share_rich = sum(vehicle_rich) /n()) %>%
  filter(n > 100) %>%
  mutate(district_rich = percent_rank(share_rich) > 0.75)
districts_rich %>%
  arrange(desc(share_rich))
data %<>% select(-district_rich)
Error: Can't subset columns that don't exist.
x Column `district_rich` doesn't exist.
Run `rlang::last_error()` to see where the error occurred.
data %>%
  add_count(subject_race, name = 'n_race') %>%
  group_by(district_rich, subject_race) %>%
  summarise(share = n() / mean(n_race)) %>%
  ggplot(aes(x = subject_race, share, fill = district_rich)) +
  geom_col()
`summarise()` has grouped output by 'district_rich'. You can override using the `.groups` argument.

data %>%
  group_by(district_rich) %>%
  summarise(search = mean(search_conducted))

Time of the day

data %>% 
  count(time, search_conducted) %>%
  ggplot(aes(x = time, y = n, col = search_conducted)) +
  geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

LS0tCnRpdGxlOiAiSW4tQ2xhc3MgRXhlcmNpc2U6IEVEQSAmIE9wZW4gUG9saWNpbmcpIgphdXRob3I6ICJEYW5pZWwgUy4gSGFpbiAoZHNoQGJ1c2luZXNzLmFhdS5kaykiCmRhdGU6ICJVcGRhdGVkIGByIGZvcm1hdChTeXMudGltZSgpLCAnJUIgJWQsICVZJylgIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMgogICAgdG9jX2Zsb2F0OgogICAgICBjb2xsYXBzZWQ6IGZhbHNlCiAgICB0aGVtZTogZmxhdGx5Ci0tLQojIFByZWFtYmxlCgpgYGB7cn0KIyMgSW1wb3J0aW5nIHBhY2thZ2VzCmxpYnJhcnkodGlkeXZlcnNlKSAjIG1ldGFwYWNrYWdlIHdpdGggbG90cyBvZiBoZWxwZnVsIGZ1bmN0aW9ucwpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KHNraW1yKQpgYGAKCiMgTG9hZCB0aGUgZGF0YQoKYGBge3J9CnBvbGljZV9zdG9wcyA8LSByZWFkX3JkcygnaHR0cHM6Ly9zdGFja3Muc3RhbmZvcmQuZWR1L2ZpbGUvZHJ1aWQ6eWc4MjFqZjg2MTEveWc4MjFqZjg2MTFfbGFfbmV3X29ybGVhbnNfMjAyMF8wNF8wMS5yZHMnKQpgYGAKCmBgYHtyfQpkYXRhIDwtIHBvbGljZV9zdG9wcyAKYGBgCgpgYGB7cn0KI2RhdGEgPC0gcmVhZF9jc3YoJ2h0dHBzOi8vc3RhY2tzLnN0YW5mb3JkLmVkdS9maWxlL2RydWlkOnlnODIxamY4NjExL3lnODIxamY4NjExX2xhX25ld19vcmxlYW5zXzIwMjBfMDRfMDEuY3N2LnppcCcpCmBgYAoKCmBgYHtyfQpkYXRhICU+JSBnbGltcHNlKCkKYGBgCgpgYGB7cn0KZGF0YSAlPiUgc2tpbSgpCmBgYAoKYGBge3J9CiMgRHJvcCBhbGwgcm93cyB3aGVyZSBzdG9wX2RhdGEsIHRpbWUsIGFuZCBkcml2ZXJfZ2VuZGVyIGFyZSBtaXNzaW5nCmRhdGEgJTw+JQogIGRyb3BfbmEoc3ViamVjdF9yYWNlLCBzdWJqZWN0X3NleCkKYGBgCgpgYGB7cn0KI2RhdGEgJTw+JSAKIyAgbXV0YXRlKGRhdGUgPSBkYXRlICU+JSBhcy5jaGFyYWN0ZXIoKSkKYGBgCgpgYGB7cn0KI2xpYnJhcnkobHVicmlkYXRlKQojZGF0YSAlPiUgcHVsbChkYXRlKSAlPiUgYXNfZGF0ZSgpIApgYGAKCiMgSWRlbnRpZnkgcG9vciBuZWlnaGJvcmhvb3JzCgpgYGB7cn0KZGF0YSAlPiUgCiAgY291bnQodmVoaWNsZV9tYWtlLCBzb3J0ID0gVFJVRSkKYGBgCgpgYGB7cn0KZGF0YSAlPD4lCiAgbXV0YXRlKHZlaGljbGVfcmljaCA9IHZlaGljbGVfbWFrZSAlaW4lIGMoJ0pFRVAnLCAnTEVYVVMnLCAnTUVSQ0VERVMnLCAnQk1XJywgJ1BPUlNDSEUnLCAnSkFHVUFSJywgJ0hVTU1FUiBBTEwtVEVSUkFJTiBWRUhJQ0xFJykpCmBgYAoKYGBge3J9CmRhdGEgJT4lCiAgZ3JvdXBfYnkodmVoaWNsZV9yaWNoKSAlPiUKICBzdW1tYXJpemUobWVhbl9hZ2UgPSBtZWFuKHZlaGljbGVfeWVhciwgbmEucm0gPSBUUlVFKSkKYGBgCgoKYGBge3J9CmRhdGEgJT4lCiAgY291bnQodmVoaWNsZV9yaWNoLCBzdWJqZWN0X3JhY2UpCmBgYAoKCmBgYHtyfQpkaXN0cmljdHNfcmljaCA8LSBkYXRhICU+JQogIGdyb3VwX2J5KGRpc3RyaWN0KSAlPiUKICBzdW1tYXJpc2UobiA9IG4oKSwgCiAgICAgICAgICAgIHNoYXJlX3JpY2ggPSBzdW0odmVoaWNsZV9yaWNoKSAvbigpKSAlPiUKICBmaWx0ZXIobiA+IDEwMCkgJT4lCiAgbXV0YXRlKGRpc3RyaWN0X3JpY2ggPSBwZXJjZW50X3Jhbmsoc2hhcmVfcmljaCkgPiAwLjc1KQpgYGAKCmBgYHtyfQpkaXN0cmljdHNfcmljaCAlPiUKICBhcnJhbmdlKGRlc2Moc2hhcmVfcmljaCkpCmBgYAoKCgpgYGB7cn0KZGF0YSAlPD4lIHNlbGVjdCgtZGlzdHJpY3RfcmljaCkKCmRhdGEgJTw+JSAKICBsZWZ0X2pvaW4oZGlzdHJpY3RzX3JpY2ggJT4lIHNlbGVjdChkaXN0cmljdCwgZGlzdHJpY3RfcmljaCksIGJ5ID0gJ2Rpc3RyaWN0JykKYGBgCgpgYGB7cn0KZGF0YSAlPiUKICBhZGRfY291bnQoc3ViamVjdF9yYWNlLCBuYW1lID0gJ25fcmFjZScpICU+JQogIGdyb3VwX2J5KGRpc3RyaWN0X3JpY2gsIHN1YmplY3RfcmFjZSkgJT4lCiAgc3VtbWFyaXNlKHNoYXJlID0gbigpIC8gbWVhbihuX3JhY2UpKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSBzdWJqZWN0X3JhY2UsIHNoYXJlLCBmaWxsID0gZGlzdHJpY3RfcmljaCkpICsKICBnZW9tX2NvbCgpCmBgYAoKYGBge3J9CmRhdGEgJT4lCiAgZ3JvdXBfYnkoZGlzdHJpY3RfcmljaCkgJT4lCiAgc3VtbWFyaXNlKHNlYXJjaCA9IG1lYW4oc2VhcmNoX2NvbmR1Y3RlZCkpCmBgYAoKCiMgVGltZSBvZiB0aGUgZGF5CgpgYGB7cn0KZGF0YSAlPiUgCiAgY291bnQodGltZSwgc2VhcmNoX2NvbmR1Y3RlZCkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gdGltZSwgeSA9IG4sIGNvbCA9IHNlYXJjaF9jb25kdWN0ZWQpKSArCiAgZ2VvbV9zbW9vdGgoKQpgYGAKCgoK