library(tidyverse)
library(magrittr)
library(skimr)
listings <- read_csv('http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2021-07-20/data/listings.csv.gz')
Rows: 9949 Columns: 74
── Column specification ───────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (23): listing_url, name, description, neighborhood_overview, picture_url, host_url, host_name,...
dbl (37): id, scrape_id, host_id, host_listings_count, host_total_listings_count, latitude, longit...
lgl (9): host_is_superhost, host_has_profile_pic, host_identity_verified, neighbourhood_group_cle...
date (5): last_scraped, host_since, calendar_last_scraped, first_review, last_review
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adittional data (not used so far)
#calendar <- read_csv('http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2021-07-20/data/calendar.csv.gz')
#calendar %>% glimpse()
# reviews <- read_csv('http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2021-07-20/data/reviews.csv.gz')
# reviews %>% glimpse()
listings %>% glimpse()
Rows: 9,949
Columns: 74
$ id <dbl> 6983, 26057, 29118, 31094, 32379, 32841, 33680, …
$ listing_url <chr> "https://www.airbnb.com/rooms/6983", "https://ww…
$ scrape_id <dbl> 20210720033302, 20210720033302, 20210720033302, …
$ last_scraped <date> 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20,…
$ name <chr> "Copenhagen 'N Livin'", "Lovely house - most att…
$ description <chr> "Lovely apartment located in the hip Nørrebro ar…
$ neighborhood_overview <chr> "Nice bars and cozy cafes just minutes away, yet…
$ picture_url <chr> "https://a0.muscache.com/pictures/42044170/f63c4…
$ host_id <dbl> 16774, 109777, 125230, 129976, 140105, 142143, 1…
$ host_url <chr> "https://www.airbnb.com/users/show/16774", "http…
$ host_name <chr> "Simon", "Kari", "Nana", "Ebbe", "Lise", "Anders…
$ host_since <date> 2009-05-12, 2010-04-17, 2010-05-15, 2010-05-22,…
$ host_location <chr> "Copenhagen, Capital Region of Denmark, Denmark"…
$ host_about <chr> "I'm currently working as an environmental consu…
$ host_response_time <chr> "within a day", "N/A", "within a few hours", "wi…
$ host_response_rate <chr> "100%", "N/A", "100%", "100%", "100%", "N/A", "1…
$ host_acceptance_rate <chr> "N/A", "N/A", "100%", "0%", "88%", "N/A", "N/A",…
$ host_is_superhost <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ host_thumbnail_url <chr> "https://a0.muscache.com/im/users/16774/profile_…
$ host_picture_url <chr> "https://a0.muscache.com/im/users/16774/profile_…
$ host_neighbourhood <chr> "Nørrebro", "Indre By", "Vesterbro", "Vesterbro"…
$ host_listings_count <dbl> 1, 1, 1, 1, 3, 1, 0, 1, 2, 1, 1, 2, 1, 1, 1, 1, …
$ host_total_listings_count <dbl> 1, 1, 1, 1, 3, 1, 0, 1, 2, 1, 1, 2, 1, 1, 1, 1, …
$ host_verifications <chr> "['email', 'phone', 'reviews']", "['email', 'pho…
$ host_has_profile_pic <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, …
$ host_identity_verified <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE…
$ neighbourhood <chr> "Copenhagen, Hovedstaden, Denmark", "Copenhagen,…
$ neighbourhood_cleansed <chr> "Nrrebro", "Indre By", "Vesterbro-Kongens Enghav…
$ neighbourhood_group_cleansed <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ latitude <dbl> 55.68641, 55.69196, 55.67023, 55.66539, 55.67297…
$ longitude <dbl> 12.54741, 12.57637, 12.55504, 12.55639, 12.55327…
$ property_type <chr> "Private room in apartment", "Entire house", "En…
$ room_type <chr> "Private room", "Entire home/apt", "Entire home/…
$ accommodates <dbl> 2, 6, 2, 3, 5, 4, 4, 4, 1, 2, 1, 1, 3, 5, 7, 4, …
$ bathrooms <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ bathrooms_text <chr> "1 shared bath", "1.5 baths", "1 bath", "1 bath"…
$ bedrooms <dbl> 1, 4, 1, 1, 3, 2, 1, 2, NA, 1, 1, 1, 1, 4, 4, 2,…
$ beds <dbl> 1, 4, 1, 3, 4, 2, 1, 3, 1, 1, 1, 1, 2, 5, 4, 2, …
$ amenities <chr> "[\"Cable TV\", \"Paid parking on premises\", \"…
$ price <chr> "$365.00", "$2,400.00", "$729.00", "$750.00", "$…
$ minimum_nights <dbl> 2, 3, 7, 2, 3, 100, 6, 5, 35, 3, 2, 5, 3, 6, 2, …
$ maximum_nights <dbl> 15, 30, 14, 10, 365, 1125, 60, 21, 90, 15, 1125,…
$ minimum_minimum_nights <dbl> 2, 3, 3, 2, 3, 100, 6, 5, 35, 3, 2, 5, 3, 6, 2, …
$ maximum_minimum_nights <dbl> 2, 3, 5, 2, 3, 100, 6, 5, 35, 3, 2, 5, 3, 6, 2, …
$ minimum_maximum_nights <dbl> 15, 30, 14, 10, 1125, 1125, 60, 21, 90, 15, 1125…
$ maximum_maximum_nights <dbl> 15, 30, 14, 10, 1125, 1125, 60, 21, 90, 15, 1125…
$ minimum_nights_avg_ntm <dbl> 2.0, 3.0, 4.1, 2.0, 3.0, 100.0, 6.0, 5.0, 35.0, …
$ maximum_nights_avg_ntm <dbl> 15, 30, 14, 10, 1125, 1125, 60, 21, 90, 15, 1125…
$ calendar_updated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ has_availability <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, …
$ availability_30 <dbl> 0, 26, 0, 0, 2, 0, 5, 23, 7, 29, 9, 0, 17, 0, 7,…
$ availability_60 <dbl> 0, 42, 0, 0, 2, 0, 9, 53, 17, 59, 32, 0, 45, 0, …
$ availability_90 <dbl> 0, 72, 0, 0, 2, 0, 15, 83, 44, 89, 62, 0, 75, 0,…
$ availability_365 <dbl> 0, 347, 46, 0, 91, 217, 249, 358, 90, 364, 62, 0…
$ calendar_last_scraped <date> 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20,…
$ number_of_reviews <dbl> 168, 50, 22, 17, 73, 7, 71, 11, 75, 67, 92, 47, …
$ number_of_reviews_ltm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, …
$ number_of_reviews_l30d <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
$ first_review <date> 2013-01-02, 2016-02-06, 2016-08-24, 2016-06-19,…
$ last_review <date> 2018-11-23, 2019-12-14, 2019-07-22, 2012-06-10,…
$ review_scores_rating <dbl> 4.78, 4.90, 4.91, 4.87, 4.89, 4.57, 4.74, 5.00, …
$ review_scores_accuracy <dbl> 4.78, 4.91, 4.85, 4.80, 4.96, 4.75, 4.75, 4.78, …
$ review_scores_cleanliness <dbl> 4.78, 4.96, 4.77, 4.87, 4.93, 4.50, 4.67, 5.00, …
$ review_scores_checkin <dbl> 4.87, 4.91, 5.00, 4.85, 4.86, 5.00, 4.91, 5.00, …
$ review_scores_communication <dbl> 4.90, 4.83, 5.00, 4.80, 4.90, 5.00, 4.91, 5.00, …
$ review_scores_location <dbl> 4.72, 4.96, 4.85, 4.85, 4.87, 4.50, 4.80, 4.89, …
$ review_scores_value <dbl> 4.71, 4.82, 4.77, 4.46, 4.70, 4.50, 4.67, 4.78, …
$ license <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ instant_bookable <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, …
$ calculated_host_listings_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, …
$ calculated_host_listings_count_entire_homes <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, …
$ calculated_host_listings_count_private_rooms <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
$ calculated_host_listings_count_shared_rooms <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ reviews_per_month <dbl> 1.61, 0.75, 0.37, 0.27, 0.66, 0.06, 0.94, 0.18, …
# To retain the original listings data
data <- listings
data %>% skim()
── Data Summary ────────────────────────
Values
Name Piped data
Number of rows 9949
Number of columns 74
_______________________
Column type frequency:
character 23
Date 5
logical 9
numeric 37
________________________
Group variables None
── Variable type: character ───────────────────────────────────────────────────────────────────────────
skim_variable n_missing complete_rate min max empty n_unique whitespace
1 listing_url 0 1 33 37 0 9949 0
2 name 1 1.00 1 211 0 9670 0
3 description 352 0.965 2 1000 0 9468 0
4 neighborhood_overview 4338 0.564 4 1000 0 5366 0
5 picture_url 0 1 61 126 0 9828 0
6 host_url 0 1 39 43 0 8677 0
7 host_name 4 1.00 1 28 0 2893 0
8 host_location 21 0.998 2 119 0 400 0
9 host_about 4366 0.561 1 6639 0 4636 12
10 host_response_time 4 1.00 3 18 0 5 0
11 host_response_rate 4 1.00 2 4 0 61 0
12 host_acceptance_rate 4 1.00 2 4 0 97 0
13 host_thumbnail_url 4 1.00 55 106 0 8627 0
14 host_picture_url 4 1.00 57 109 0 8627 0
15 host_neighbourhood 4029 0.595 2 20 0 33 0
16 host_verifications 0 1 2 158 0 254 0
17 neighbourhood 4338 0.564 7 51 0 192 0
18 neighbourhood_cleansed 0 1 5 25 0 11 0
19 property_type 0 1 4 34 0 48 0
20 room_type 0 1 10 15 0 4 0
21 bathrooms_text 17 0.998 6 17 0 21 0
22 amenities 0 1 2 1424 0 9567 0
23 price 0 1 5 11 0 1451 0
── Variable type: Date ────────────────────────────────────────────────────────────────────────────────
skim_variable n_missing complete_rate min max median n_unique
1 last_scraped 0 1 2021-07-20 2021-07-20 2021-07-20 1
2 host_since 4 1.00 2009-05-12 2021-07-18 2015-06-14 3046
3 calendar_last_scraped 0 1 2021-07-20 2021-07-20 2021-07-20 1
4 first_review 1899 0.809 2011-07-09 2021-07-19 2018-08-22 2130
5 last_review 1899 0.809 2011-07-21 2021-07-19 2019-09-07 1603
── Variable type: logical ─────────────────────────────────────────────────────────────────────────────
skim_variable n_missing complete_rate mean count
1 host_is_superhost 4 1.00 0.139 "FAL: 8560, TRU: 1385"
2 host_has_profile_pic 4 1.00 0.995 "TRU: 9897, FAL: 48"
3 host_identity_verified 4 1.00 0.775 "TRU: 7712, FAL: 2233"
4 neighbourhood_group_cleansed 9949 0 NaN ": "
5 bathrooms 9949 0 NaN ": "
6 calendar_updated 9949 0 NaN ": "
7 has_availability 0 1 0.983 "TRU: 9782, FAL: 167"
8 license 9949 0 NaN ": "
9 instant_bookable 0 1 0.222 "FAL: 7745, TRU: 2204"
── Variable type: numeric ─────────────────────────────────────────────────────────────────────────────
skim_variable n_missing complete_rate mean sd p0
1 id 0 1 2.62e+ 7 15951708. 6.98e+ 3
2 scrape_id 0 1 2.02e+13 0 2.02e+13
3 host_id 0 1 8.20e+ 7 98867345. 1.68e+ 4
4 host_listings_count 4 1.00 1.08e+ 1 53.1 0
5 host_total_listings_count 4 1.00 1.08e+ 1 53.1 0
6 latitude 0 1 5.57e+ 1 0.0191 5.56e+ 1
7 longitude 0 1 1.26e+ 1 0.0317 1.25e+ 1
8 accommodates 0 1 3.48e+ 0 1.79 0
9 bedrooms 236 0.976 1.70e+ 0 1.38 1 e+ 0
10 beds 73 0.993 2.09e+ 0 1.52 0
11 minimum_nights 0 1 4.31e+ 0 17.6 1 e+ 0
12 maximum_nights 0 1 5.72e+ 2 540. 1 e+ 0
13 minimum_minimum_nights 0 1 4.29e+ 0 17.2 1 e+ 0
14 maximum_minimum_nights 0 1 4.72e+ 0 18.3 1 e+ 0
15 minimum_maximum_nights 0 1 6.51e+ 2 538. 1 e+ 0
16 maximum_maximum_nights 0 1 6.62e+ 2 536. 1 e+ 0
17 minimum_nights_avg_ntm 0 1 4.53e+ 0 17.9 1 e+ 0
18 maximum_nights_avg_ntm 0 1 6.58e+ 2 536. 1 e+ 0
19 availability_30 0 1 8.88e+ 0 10.7 0
20 availability_60 0 1 1.87e+ 1 22.0 0
21 availability_90 0 1 3.06e+ 1 34.0 0
22 availability_365 0 1 1.14e+ 2 128. 0
23 number_of_reviews 0 1 1.90e+ 1 35.0 0
24 number_of_reviews_ltm 0 1 1.40e+ 0 3.98 0
25 number_of_reviews_l30d 0 1 2.59e- 1 0.792 0
26 review_scores_rating 1899 0.809 4.72e+ 0 0.613 0
27 review_scores_accuracy 1998 0.799 4.83e+ 0 0.300 0
28 review_scores_cleanliness 1998 0.799 4.69e+ 0 0.426 0
29 review_scores_checkin 1998 0.799 4.88e+ 0 0.268 0
30 review_scores_communication 1998 0.799 4.90e+ 0 0.261 1 e+ 0
31 review_scores_location 1999 0.799 4.82e+ 0 0.267 1 e+ 0
32 review_scores_value 1999 0.799 4.70e+ 0 0.348 1 e+ 0
33 calculated_host_listings_count 0 1 5.99e+ 0 26.3 1 e+ 0
34 calculated_host_listings_count_entire_homes 0 1 5.67e+ 0 26.3 0
35 calculated_host_listings_count_private_rooms 0 1 3.00e- 1 0.956 0
36 calculated_host_listings_count_shared_rooms 0 1 7.54e- 3 0.149 0
37 reviews_per_month 1899 0.809 6.82e- 1 1.12 1 e- 2
p25 p50 p75 p100 hist
1 1.24e+ 7 2.67e+ 7 4.05e+ 7 5.11e 7 ▇▆▆▆▇
2 2.02e+13 2.02e+13 2.02e+13 2.02e13 ▁▁▇▁▁
3 1.03e+ 7 3.55e+ 7 1.29e+ 8 4.14e 8 ▇▂▁▁▁
4 1 e+ 0 1 e+ 0 2 e+ 0 3.46e 2 ▇▁▁▁▁
5 1 e+ 0 1 e+ 0 2 e+ 0 3.46e 2 ▇▁▁▁▁
6 5.57e+ 1 5.57e+ 1 5.57e+ 1 5.57e 1 ▁▃▇▆▁
7 1.25e+ 1 1.26e+ 1 1.26e+ 1 1.26e 1 ▁▂▇▆▂
8 2 e+ 0 3 e+ 0 4 e+ 0 1.6 e 1 ▇▆▁▁▁
9 1 e+ 0 1 e+ 0 2 e+ 0 1.01e 2 ▇▁▁▁▁
10 1 e+ 0 2 e+ 0 3 e+ 0 2.5 e 1 ▇▁▁▁▁
11 2 e+ 0 3 e+ 0 4 e+ 0 1.11e 3 ▇▁▁▁▁
12 2 e+ 1 3.65e+ 2 1.12e+ 3 4 e 3 ▇▇▁▁▁
13 2 e+ 0 3 e+ 0 4 e+ 0 1.11e 3 ▇▁▁▁▁
14 2 e+ 0 3 e+ 0 4 e+ 0 1.11e 3 ▇▁▁▁▁
15 2.5 e+ 1 1.12e+ 3 1.12e+ 3 4 e 3 ▆▇▁▁▁
16 2.8 e+ 1 1.12e+ 3 1.12e+ 3 4 e 3 ▆▇▁▁▁
17 2 e+ 0 3 e+ 0 4 e+ 0 1.11e 3 ▇▁▁▁▁
18 2.8 e+ 1 1.12e+ 3 1.12e+ 3 4 e 3 ▆▇▁▁▁
19 0 4 e+ 0 1.6 e+ 1 3 e 1 ▇▂▁▁▂
20 0 8 e+ 0 3.7 e+ 1 6 e 1 ▇▂▁▁▂
21 0 1.3 e+ 1 6.3 e+ 1 9 e 1 ▇▁▁▁▃
22 3 e+ 0 5.8 e+ 1 2 e+ 2 3.65e 2 ▇▂▂▁▂
23 1 e+ 0 7 e+ 0 2.2 e+ 1 6.45e 2 ▇▁▁▁▁
24 0 0 1 e+ 0 1.16e 2 ▇▁▁▁▁
25 0 0 0 2.2 e 1 ▇▁▁▁▁
26 4.67e+ 0 4.86e+ 0 5 e+ 0 5 e 0 ▁▁▁▁▇
27 4.79e+ 0 4.92e+ 0 5 e+ 0 5 e 0 ▁▁▁▁▇
28 4.56e+ 0 4.81e+ 0 5 e+ 0 5 e 0 ▁▁▁▁▇
29 4.86e+ 0 4.97e+ 0 5 e+ 0 5 e 0 ▁▁▁▁▇
30 4.89e+ 0 5 e+ 0 5 e+ 0 5 e 0 ▁▁▁▁▇
31 4.75e+ 0 4.89e+ 0 5 e+ 0 5 e 0 ▁▁▁▁▇
32 4.6 e+ 0 4.76e+ 0 4.92e+ 0 5 e 0 ▁▁▁▁▇
33 1 e+ 0 1 e+ 0 1 e+ 0 1.86e 2 ▇▁▁▁▁
34 1 e+ 0 1 e+ 0 1 e+ 0 1.86e 2 ▇▁▁▁▁
35 0 0 0 1 e 1 ▇▁▁▁▁
36 0 0 0 5 e 0 ▇▁▁▁▁
37 1.7 e- 1 3.9 e- 1 8 e- 1 3.66e 1 ▇▁▁▁▁
data %<>%
mutate(price = price %>% parse_number())
# Empty characters
data %<>%
mutate(across(where(is_character), ~ifelse(.x == "", NA, .x)))
data %<>%
rename(y = price) %>%
select(y, review_scores_rating, neighbourhood_cleansed, accommodates, room_type, number_of_reviews, host_is_superhost, host_identity_verified, bedrooms)
data %>% ggplot(aes(x = y)) +
geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %<>%
drop_na(y) %>%
filter(percent_rank(y) <0.95)
data %>% count(room_type, sort = TRUE)
data %<>%
filter(!(room_type %in% c('Shared room', 'Hotel room')))
library(VIM)
data %>%
aggr(numbers = TRUE, prop = c(TRUE, FALSE))
# We here for convenience justy decided to drop all NAs and not deal with them in a more sophisticated way
data %<>%
drop_na()
library(tidymodels)
Registered S3 method overwritten by 'tune':
method from
required_pkgs.model_spec parsnip
── Attaching packages ───────────────────────────────────────────────────────────── tidymodels 0.1.3 ──
✓ broom 0.7.9 ✓ rsample 0.1.0
✓ dials 0.0.10 ✓ tune 0.1.6
✓ infer 1.0.0 ✓ workflows 0.2.3
✓ modeldata 0.1.1 ✓ workflowsets 0.1.0
✓ parsnip 0.1.7 ✓ yardstick 0.0.8
✓ recipes 0.1.16
── Conflicts ──────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
x scales::discard() masks purrr::discard()
x magrittr::extract() masks tidyr::extract()
x dplyr::filter() masks stats::filter()
x recipes::fixed() masks stringr::fixed()
x dplyr::lag() masks stats::lag()
x recipes::prepare() masks VIM::prepare()
x magrittr::set_names() masks purrr::set_names()
x yardstick::spec() masks readr::spec()
x recipes::step() masks stats::step()
• Use tidymodels_prefer() to resolve common conflicts.
data_split <- initial_split(data, prop = 0.80, strata = y)
data_train <- data_split %>% training()
data_test <- data_split %>% testing()
data_recipe <- data_train %>%
recipe(y ~.) %>%
step_normalize(all_numeric(), -all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)
model_null <- null_model(mode = 'regression')
model_lm <- linear_reg(mode = 'regression') %>%
set_engine('lm')
model_xg <- boost_tree(mode = 'regression',
trees = 100,
mtry = tune(),
min_n = tune(),
tree_depth = tune(),
learn_rate = tune()
) %>%
set_engine("xgboost")
workflow_general <- workflow() %>%
add_recipe(data_recipe)
workflow_null <- workflow_general %>%
add_model(model_null)
workflow_lm <- workflow_general %>%
add_model(model_lm)
workflow_xg <- workflow_general %>%
add_model(model_xg)
data_resample <- data_train %>%
vfold_cv(strata = y,
v = 5,
repeats = 3)
tune_xg <-
tune_grid(
workflow_xg,
resamples = data_resample,
grid = 10
)
i Creating pre-processing data to finalize unknown parameter: mtry
tune_xg %>% autoplot()
best_param_xg <- tune_xg %>% select_best(metric = 'rmse')
best_param_xg
workflow_final_xg <- workflow_xg %>%
finalize_workflow(parameters = best_param_xg)
fit_null <- workflow_null %>%
fit(data_train)
Warning: Engine set to `parsnip`.
fit_lm <- workflow_lm %>%
fit(data_train)
fit_xg <- workflow_final_xg %>%
fit(data_train)
pred_collected <- tibble(
truth = data_train %>% pull(y),
base = fit_null %>% predict(new_data = data_train) %>% pull(.pred),
lm = fit_lm %>% predict(new_data = data_train) %>% pull(.pred),
xg = fit_xg %>% predict(new_data = data_train) %>% pull(.pred),
) %>%
pivot_longer(cols = -truth,
names_to = 'model',
values_to = '.pred')
Warning in predict.lm(object = object$fit, newdata = new_data, type = "response") :
prediction from a rank-deficient fit may be misleading
pred_collected %>%
group_by(model) %>%
rmse(truth = truth, estimate = .pred) %>%
select(model, .estimate) %>%
rename(RMSE = .estimate) %>%
arrange(RMSE)
pred_collected %>%
ggplot(aes(x = truth, y = .pred, color = model)) +
geom_abline(lty = 2, color = "gray80", size = 1.5) +
geom_point(alpha = 0.5) +
labs(
x = "Truth",
y = "Predicted price",
color = "Type of model"
)
Warning: Removed 1551 rows containing missing values (geom_point).
Reminder: This was just fitted on the train data. To do the last fit using all train data and evaluate on thetest data, do the following
fit_last
# Resampling results
# Manual resampling