library(tidyverse)
library(magrittr)
library(skimr)

Load data

data <- read_csv('http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2021-07-20/data/listings.csv.gz')
Rows: 9949 Columns: 74
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (23): listing_url, name, description, neighborhood_overview, picture_url, host_url, host_name, host_location, host_about, host_response_time, host_response_rate, host_acceptance_rate, h...
dbl  (37): id, scrape_id, host_id, host_listings_count, host_total_listings_count, latitude, longitude, accommodates, bedrooms, beds, minimum_nights, maximum_nights, minimum_minimum_nights, ...
lgl   (9): host_is_superhost, host_has_profile_pic, host_identity_verified, neighbourhood_group_cleansed, bathrooms, calendar_updated, has_availability, license, instant_bookable
date  (5): last_scraped, host_since, calendar_last_scraped, first_review, last_review

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>% glimpse()
Rows: 9,949
Columns: 74
$ id                                           <dbl> 6983, 26057, 29118, 31094, 32379, 32841, 33680, 37159, 55465, 60048, 65902, 69440, 78381, 112292, 118649, 118971, 119322, 130130, 135777, 1…
$ listing_url                                  <chr> "https://www.airbnb.com/rooms/6983", "https://www.airbnb.com/rooms/26057", "https://www.airbnb.com/rooms/29118", "https://www.airbnb.com/ro…
$ scrape_id                                    <dbl> 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13, 2.021072e+13,…
$ last_scraped                                 <date> 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-0…
$ name                                         <chr> "Copenhagen 'N Livin'", "Lovely house - most attractive area", "Best Location in Cool Istedgade", "Very central and cozy, new kitchen", "12…
$ description                                  <chr> "Lovely apartment located in the hip Nørrebro area, close to bars, cafés and restaurants.<br />The room itself is rather small, but sleeps …
$ neighborhood_overview                        <chr> "Nice bars and cozy cafes just minutes away, yet the street itself is quiet and you won't be bothered by loud music.", "The neighborhood is…
$ picture_url                                  <chr> "https://a0.muscache.com/pictures/42044170/f63c4d99_original.jpg", "https://a0.muscache.com/pictures/miso/Hosting-26057/original/e9c69708-0…
$ host_id                                      <dbl> 16774, 109777, 125230, 129976, 140105, 142143, 145671, 160390, 261977, 288615, 322300, 194944, 420914, 573172, 599145, 600859, 602647, 6411…
$ host_url                                     <chr> "https://www.airbnb.com/users/show/16774", "https://www.airbnb.com/users/show/109777", "https://www.airbnb.com/users/show/125230", "https:/…
$ host_name                                    <chr> "Simon", "Kari", "Nana", "Ebbe", "Lise", "Anders & Maria", "Mette", "Jeanette", "Morten", "Oliver", "Sjelle", "Anne", "Cathrine", "Christof…
$ host_since                                   <date> 2009-05-12, 2010-04-17, 2010-05-15, 2010-05-22, 2010-06-07, 2010-06-10, 2010-06-16, 2010-07-07, 2010-10-14, 2010-11-16, 2010-12-19, 2010-0…
$ host_location                                <chr> "Copenhagen, Capital Region of Denmark, Denmark", "København, Denmark", "Copenhagen, Denmark", "Copenhagen, Capital Region of Denmark, Denm…
$ host_about                                   <chr> "I'm currently working as an environmental consultant for a large engineering consultancy in Copenhagen.\r\nWhen I'm not at work, I spend t…
$ host_response_time                           <chr> "within a day", "N/A", "within a few hours", "within a few hours", "within a few hours", "N/A", "within an hour", "N/A", "within a day", "w…
$ host_response_rate                           <chr> "100%", "N/A", "100%", "100%", "100%", "N/A", "100%", "N/A", "100%", "100%", "100%", "N/A", "100%", "100%", "N/A", "75%", "100%", "80%", "N…
$ host_acceptance_rate                         <chr> "N/A", "N/A", "100%", "0%", "88%", "N/A", "N/A", "N/A", "50%", "N/A", "N/A", "100%", "100%", "N/A", "N/A", "N/A", "0%", "36%", "N/A", "N/A"…
$ host_is_superhost                            <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FA…
$ host_thumbnail_url                           <chr> "https://a0.muscache.com/im/users/16774/profile_pic/1401276934/original.jpg?aki_policy=profile_small", "https://a0.muscache.com/im/users/10…
$ host_picture_url                             <chr> "https://a0.muscache.com/im/users/16774/profile_pic/1401276934/original.jpg?aki_policy=profile_x_medium", "https://a0.muscache.com/im/users…
$ host_neighbourhood                           <chr> "Nørrebro", "Indre By", "Vesterbro", "Vesterbro", "Vesterbro", "Østerbro", "Vesterbro", "Indre By", "Nørrebro", "Vesterbro", "Nørrebro", "F…
$ host_listings_count                          <dbl> 1, 1, 1, 1, 3, 1, 0, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 3, 1, 1, 0, 6…
$ host_total_listings_count                    <dbl> 1, 1, 1, 1, 3, 1, 0, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 3, 1, 1, 0, 6…
$ host_verifications                           <chr> "['email', 'phone', 'reviews']", "['email', 'phone', 'reviews', 'jumio', 'offline_government_id', 'government_id']", "['email', 'phone', 'r…
$ host_has_profile_pic                         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T…
$ host_identity_verified                       <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TR…
$ neighbourhood                                <chr> "Copenhagen, Hovedstaden, Denmark", "Copenhagen, Hovedstaden, Denmark", NA, "Copenhagen, Capital Region of Denmark, Denmark", "Copenhagen, …
$ neighbourhood_cleansed                       <chr> "Nrrebro", "Indre By", "Vesterbro-Kongens Enghave", "Vesterbro-Kongens Enghave", "Vesterbro-Kongens Enghave", "sterbro", "Vesterbro-Kongens…
$ neighbourhood_group_cleansed                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ latitude                                     <dbl> 55.68641, 55.69196, 55.67023, 55.66539, 55.67297, 55.71176, 55.66631, 55.68547, 55.68127, 55.66720, 55.70710, 55.68493, 55.68384, 55.68068,…
$ longitude                                    <dbl> 12.54741, 12.57637, 12.55504, 12.55639, 12.55327, 12.57091, 12.54555, 12.56543, 12.55389, 12.54668, 12.55386, 12.53434, 12.56687, 12.54581,…
$ property_type                                <chr> "Private room in apartment", "Entire house", "Entire apartment", "Entire apartment", "Entire apartment", "Entire apartment", "Entire apartm…
$ room_type                                    <chr> "Private room", "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire h…
$ accommodates                                 <dbl> 2, 6, 2, 3, 5, 4, 4, 4, 1, 2, 1, 1, 3, 5, 7, 4, 6, 6, 3, 2, 6, 2, 2, 4, 3, 2, 2, 2, 2, 2, 4, 1, 4, 2, 6, 4, 2, 2, 2, 4, 5, 2, 1, 5, 9, 2, 2…
$ bathrooms                                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ bathrooms_text                               <chr> "1 shared bath", "1.5 baths", "1 bath", "1 bath", "2 baths", "1 bath", "1 bath", "1 bath", "1 bath", "1 bath", "1 shared bath", "1 shared b…
$ bedrooms                                     <dbl> 1, 4, 1, 1, 3, 2, 1, 2, NA, 1, 1, 1, 1, 4, 4, 2, 2, 2, 1, 1, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 3, 2, 1, 1, 3, 4, 2, …
$ beds                                         <dbl> 1, 4, 1, 3, 4, 2, 1, 3, 1, 1, 1, 1, 2, 5, 4, 2, 6, 3, 1, 1, 5, 1, 1, 4, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 6, 2, 1, 1, 1, 4, 2, 2, 1, 3, 4, 2, 2…
$ amenities                                    <chr> "[\"Cable TV\", \"Paid parking on premises\", \"Iron\", \"Room-darkening shades\", \"Hair dryer\", \"Stove\", \"Hot water\", \"Dishwasher\"…
$ price                                        <chr> "$365.00", "$2,400.00", "$729.00", "$750.00", "$1,153.00", "$617.00", "$1,000.00", "$2,396.00", "$535.00", "$851.00", "$400.00", "$322.00",…
$ minimum_nights                               <dbl> 2, 3, 7, 2, 3, 100, 6, 5, 35, 3, 2, 5, 3, 6, 2, 4, 3, 2, 5, 2, 2, 4, 5, 3, 3, 6, 5, 7, 2, 5, 3, 1, 3, 5, 7, 3, 2, 2, 4, 2, 5, 3, 1, 2, 5, 3…
$ maximum_nights                               <dbl> 15, 30, 14, 10, 365, 1125, 60, 21, 90, 15, 1125, 21, 60, 40, 1125, 1125, 21, 31, 365, 730, 30, 30, 31, 1120, 28, 21, 730, 14, 300, 21, 21, …
$ minimum_minimum_nights                       <dbl> 2, 3, 3, 2, 3, 100, 6, 5, 35, 3, 2, 5, 3, 6, 2, 4, 3, 2, 5, 2, 2, 4, 5, 3, 3, 6, 5, 7, 1, 5, 3, 1, 3, 5, 7, 3, 2, 2, 4, 2, 5, 3, 1, 2, 1, 3…
$ maximum_minimum_nights                       <dbl> 2, 3, 5, 2, 3, 100, 6, 5, 35, 3, 2, 5, 3, 6, 2, 4, 3, 3, 5, 2, 2, 4, 5, 3, 3, 6, 5, 7, 2, 5, 3, 1, 3, 5, 7, 3, 2, 2, 4, 2, 5, 3, 1, 2, 5, 3…
$ minimum_maximum_nights                       <dbl> 15, 30, 14, 10, 1125, 1125, 60, 21, 90, 15, 1125, 21, 60, 40, 1125, 1125, 21, 31, 365, 730, 30, 30, 31, 1120, 28, 21, 730, 14, 300, 21, 21,…
$ maximum_maximum_nights                       <dbl> 15, 30, 14, 10, 1125, 1125, 60, 21, 90, 15, 1125, 21, 60, 40, 1125, 1125, 21, 31, 365, 730, 30, 30, 31, 1120, 28, 21, 730, 14, 300, 21, 21,…
$ minimum_nights_avg_ntm                       <dbl> 2.0, 3.0, 4.1, 2.0, 3.0, 100.0, 6.0, 5.0, 35.0, 3.0, 2.0, 5.0, 3.0, 6.0, 2.0, 4.0, 3.0, 2.0, 5.0, 2.0, 2.0, 4.0, 5.0, 3.0, 3.0, 6.0, 5.0, 7…
$ maximum_nights_avg_ntm                       <dbl> 15, 30, 14, 10, 1125, 1125, 60, 21, 90, 15, 1125, 21, 60, 40, 1125, 1125, 21, 31, 365, 730, 30, 30, 31, 1120, 28, 21, 730, 14, 300, 21, 21,…
$ calendar_updated                             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ has_availability                             <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T…
$ availability_30                              <dbl> 0, 26, 0, 0, 2, 0, 5, 23, 7, 29, 9, 0, 17, 0, 7, 29, 0, 4, 7, 0, 7, 12, 0, 0, 1, 9, 11, 1, 5, 0, 8, 0, 7, 19, 0, 3, 18, 18, 12, 4, 0, 0, 4,…
$ availability_60                              <dbl> 0, 42, 0, 0, 2, 0, 9, 53, 17, 59, 32, 0, 45, 0, 7, 59, 0, 4, 7, 0, 17, 34, 19, 0, 1, 21, 32, 18, 32, 0, 15, 17, 32, 41, 0, 3, 48, 36, 13, 2…
$ availability_90                              <dbl> 0, 72, 0, 0, 2, 0, 15, 83, 44, 89, 62, 0, 75, 0, 10, 89, 0, 4, 7, 0, 35, 64, 49, 0, 1, 44, 62, 48, 62, 0, 33, 47, 58, 71, 0, 3, 78, 66, 13,…
$ availability_365                             <dbl> 0, 347, 46, 0, 91, 217, 249, 358, 90, 364, 62, 0, 350, 70, 32, 364, 261, 39, 7, 200, 145, 339, 49, 0, 262, 312, 337, 316, 152, 14, 123, 322…
$ calendar_last_scraped                        <date> 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-07-20, 2021-0…
$ number_of_reviews                            <dbl> 168, 50, 22, 17, 73, 7, 71, 11, 75, 67, 92, 47, 102, 30, 42, 24, 14, 47, 6, 95, 84, 10, 43, 36, 46, 100, 55, 1, 381, 71, 29, 187, 131, 92, …
$ number_of_reviews_ltm                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 3, 0, 0, 17, 0, 1, 3, 6, 21, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 1, 0…
$ number_of_reviews_l30d                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
$ first_review                                 <date> 2013-01-02, 2016-02-06, 2016-08-24, 2016-06-19, 2012-06-12, 2011-07-09, 2015-05-19, 2016-06-04, 2014-05-14, 2013-03-04, 2012-05-21, 2017-1…
$ last_review                                  <date> 2018-11-23, 2019-12-14, 2019-07-22, 2012-06-10, 2019-08-17, 2012-05-08, 2018-05-15, 2017-05-01, 2017-11-17, 2019-05-26, 2019-06-09, 2019-0…
$ review_scores_rating                         <dbl> 4.78, 4.90, 4.91, 4.87, 4.89, 4.57, 4.74, 5.00, 4.72, 4.73, 4.70, 4.83, 4.85, 4.80, 4.71, 4.67, 4.75, 4.61, 5.00, 4.98, 4.89, 5.00, 4.65, 4…
$ review_scores_accuracy                       <dbl> 4.78, 4.91, 4.85, 4.80, 4.96, 4.75, 4.75, 4.78, 4.69, 4.67, 4.61, 4.95, 4.89, 4.67, 4.76, 4.75, 4.69, 4.76, 5.00, 4.96, 4.88, 5.00, 4.56, 4…
$ review_scores_cleanliness                    <dbl> 4.78, 4.96, 4.77, 4.87, 4.93, 4.50, 4.67, 5.00, 4.51, 4.53, 4.59, 4.93, 4.87, 4.63, 4.88, 4.25, 4.62, 4.37, 5.00, 4.96, 4.75, 4.63, 4.21, 4…
$ review_scores_checkin                        <dbl> 4.87, 4.91, 5.00, 4.85, 4.86, 5.00, 4.91, 5.00, 4.83, 4.71, 4.76, 4.91, 4.94, 4.78, 4.81, 4.79, 5.00, 4.83, 5.00, 4.98, 4.91, 5.00, 4.74, 4…
$ review_scores_communication                  <dbl> 4.90, 4.83, 5.00, 4.80, 4.90, 5.00, 4.91, 5.00, 4.92, 4.78, 4.72, 4.88, 4.97, 4.78, 4.95, 4.88, 4.92, 4.71, 5.00, 4.97, 4.89, 5.00, 4.84, 4…
$ review_scores_location                       <dbl> 4.72, 4.96, 4.85, 4.85, 4.87, 4.50, 4.80, 4.89, 4.76, 4.66, 4.42, 4.74, 4.97, 4.81, 4.93, 4.96, 4.85, 4.78, 5.00, 4.80, 4.94, 5.00, 4.72, 4…
$ review_scores_value                          <dbl> 4.71, 4.82, 4.77, 4.46, 4.70, 4.50, 4.67, 4.78, 4.62, 4.47, 4.47, 4.79, 4.72, 4.67, 4.60, 4.63, 4.38, 4.71, 5.00, 4.89, 4.76, 4.88, 4.49, 4…
$ license                                      <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ instant_bookable                             <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
$ calculated_host_listings_count               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 3, 5, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 3, 1, 1, 1, 6…
$ calculated_host_listings_count_entire_homes  <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0…
$ calculated_host_listings_count_private_rooms <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 3, 5, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3…
$ calculated_host_listings_count_shared_rooms  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3…
$ reviews_per_month                            <dbl> 1.61, 0.75, 0.37, 0.27, 0.66, 0.06, 0.94, 0.18, 0.86, 0.66, 0.82, 1.03, 1.36, 0.27, 0.55, 0.60, 0.24, 0.97, 0.05, 0.86, 0.76, 0.57, 2.28, 0…
## Adittional data (not used so far)
#calendar <- read_csv('http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2021-07-20/data/calendar.csv.gz')
#calendar %>% glimpse()

# reviews <- read_csv('http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2021-07-20/data/reviews.csv.gz')
# reviews %>% glimpse()
# To retain the original listings data
data <- listings 
Error: object 'listings' not found

EDA

data %>% skim()
── Data Summary ────────────────────────
                           Values    
Name                       Piped data
Number of rows             9949      
Number of columns          74        
_______________________              
Column type frequency:               
  character                23        
  Date                     5         
  logical                  9         
  numeric                  37        
________________________             
Group variables            None      

── Variable type: character ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   skim_variable          n_missing complete_rate   min   max empty n_unique whitespace
 1 listing_url                    0         1        33    37     0     9949          0
 2 name                           1         1.00      1   211     0     9670          0
 3 description                  352         0.965     2  1000     0     9468          0
 4 neighborhood_overview       4338         0.564     4  1000     0     5366          0
 5 picture_url                    0         1        61   126     0     9828          0
 6 host_url                       0         1        39    43     0     8677          0
 7 host_name                      4         1.00      1    28     0     2893          0
 8 host_location                 21         0.998     2   119     0      400          0
 9 host_about                  4366         0.561     1  6639     0     4636         12
10 host_response_time             4         1.00      3    18     0        5          0
11 host_response_rate             4         1.00      2     4     0       61          0
12 host_acceptance_rate           4         1.00      2     4     0       97          0
13 host_thumbnail_url             4         1.00     55   106     0     8627          0
14 host_picture_url               4         1.00     57   109     0     8627          0
15 host_neighbourhood          4029         0.595     2    20     0       33          0
16 host_verifications             0         1         2   158     0      254          0
17 neighbourhood               4338         0.564     7    51     0      192          0
18 neighbourhood_cleansed         0         1         5    25     0       11          0
19 property_type                  0         1         4    34     0       48          0
20 room_type                      0         1        10    15     0        4          0
21 bathrooms_text                17         0.998     6    17     0       21          0
22 amenities                      0         1         2  1424     0     9567          0
23 price                          0         1         5    11     0     1451          0

── Variable type: Date ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable         n_missing complete_rate min        max        median     n_unique
1 last_scraped                  0         1     2021-07-20 2021-07-20 2021-07-20        1
2 host_since                    4         1.00  2009-05-12 2021-07-18 2015-06-14     3046
3 calendar_last_scraped         0         1     2021-07-20 2021-07-20 2021-07-20        1
4 first_review               1899         0.809 2011-07-09 2021-07-19 2018-08-22     2130
5 last_review                1899         0.809 2011-07-21 2021-07-19 2019-09-07     1603

── Variable type: logical ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable                n_missing complete_rate    mean count                 
1 host_is_superhost                    4          1.00   0.139 "FAL: 8560, TRU: 1385"
2 host_has_profile_pic                 4          1.00   0.995 "TRU: 9897, FAL: 48"  
3 host_identity_verified               4          1.00   0.775 "TRU: 7712, FAL: 2233"
4 neighbourhood_group_cleansed      9949          0    NaN     ": "                  
5 bathrooms                         9949          0    NaN     ": "                  
6 calendar_updated                  9949          0    NaN     ": "                  
7 has_availability                     0          1      0.983 "TRU: 9782, FAL: 167" 
8 license                           9949          0    NaN     ": "                  
9 instant_bookable                     0          1      0.222 "FAL: 7745, TRU: 2204"

── Variable type: numeric ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   skim_variable                                n_missing complete_rate     mean            sd       p0      p25      p50      p75    p100 hist 
 1 id                                                   0         1     2.62e+ 7 15951708.     6.98e+ 3 1.24e+ 7 2.67e+ 7 4.05e+ 7 5.11e 7 ▇▆▆▆▇
 2 scrape_id                                            0         1     2.02e+13        0      2.02e+13 2.02e+13 2.02e+13 2.02e+13 2.02e13 ▁▁▇▁▁
 3 host_id                                              0         1     8.20e+ 7 98867345.     1.68e+ 4 1.03e+ 7 3.55e+ 7 1.29e+ 8 4.14e 8 ▇▂▁▁▁
 4 host_listings_count                                  4         1.00  1.08e+ 1       53.1    0        1   e+ 0 1   e+ 0 2   e+ 0 3.46e 2 ▇▁▁▁▁
 5 host_total_listings_count                            4         1.00  1.08e+ 1       53.1    0        1   e+ 0 1   e+ 0 2   e+ 0 3.46e 2 ▇▁▁▁▁
 6 latitude                                             0         1     5.57e+ 1        0.0191 5.56e+ 1 5.57e+ 1 5.57e+ 1 5.57e+ 1 5.57e 1 ▁▃▇▆▁
 7 longitude                                            0         1     1.26e+ 1        0.0317 1.25e+ 1 1.25e+ 1 1.26e+ 1 1.26e+ 1 1.26e 1 ▁▂▇▆▂
 8 accommodates                                         0         1     3.48e+ 0        1.79   0        2   e+ 0 3   e+ 0 4   e+ 0 1.6 e 1 ▇▆▁▁▁
 9 bedrooms                                           236         0.976 1.70e+ 0        1.38   1   e+ 0 1   e+ 0 1   e+ 0 2   e+ 0 1.01e 2 ▇▁▁▁▁
10 beds                                                73         0.993 2.09e+ 0        1.52   0        1   e+ 0 2   e+ 0 3   e+ 0 2.5 e 1 ▇▁▁▁▁
11 minimum_nights                                       0         1     4.31e+ 0       17.6    1   e+ 0 2   e+ 0 3   e+ 0 4   e+ 0 1.11e 3 ▇▁▁▁▁
12 maximum_nights                                       0         1     5.72e+ 2      540.     1   e+ 0 2   e+ 1 3.65e+ 2 1.12e+ 3 4   e 3 ▇▇▁▁▁
13 minimum_minimum_nights                               0         1     4.29e+ 0       17.2    1   e+ 0 2   e+ 0 3   e+ 0 4   e+ 0 1.11e 3 ▇▁▁▁▁
14 maximum_minimum_nights                               0         1     4.72e+ 0       18.3    1   e+ 0 2   e+ 0 3   e+ 0 4   e+ 0 1.11e 3 ▇▁▁▁▁
15 minimum_maximum_nights                               0         1     6.51e+ 2      538.     1   e+ 0 2.5 e+ 1 1.12e+ 3 1.12e+ 3 4   e 3 ▆▇▁▁▁
16 maximum_maximum_nights                               0         1     6.62e+ 2      536.     1   e+ 0 2.8 e+ 1 1.12e+ 3 1.12e+ 3 4   e 3 ▆▇▁▁▁
17 minimum_nights_avg_ntm                               0         1     4.53e+ 0       17.9    1   e+ 0 2   e+ 0 3   e+ 0 4   e+ 0 1.11e 3 ▇▁▁▁▁
18 maximum_nights_avg_ntm                               0         1     6.58e+ 2      536.     1   e+ 0 2.8 e+ 1 1.12e+ 3 1.12e+ 3 4   e 3 ▆▇▁▁▁
19 availability_30                                      0         1     8.88e+ 0       10.7    0        0        4   e+ 0 1.6 e+ 1 3   e 1 ▇▂▁▁▂
20 availability_60                                      0         1     1.87e+ 1       22.0    0        0        8   e+ 0 3.7 e+ 1 6   e 1 ▇▂▁▁▂
21 availability_90                                      0         1     3.06e+ 1       34.0    0        0        1.3 e+ 1 6.3 e+ 1 9   e 1 ▇▁▁▁▃
22 availability_365                                     0         1     1.14e+ 2      128.     0        3   e+ 0 5.8 e+ 1 2   e+ 2 3.65e 2 ▇▂▂▁▂
23 number_of_reviews                                    0         1     1.90e+ 1       35.0    0        1   e+ 0 7   e+ 0 2.2 e+ 1 6.45e 2 ▇▁▁▁▁
24 number_of_reviews_ltm                                0         1     1.40e+ 0        3.98   0        0        0        1   e+ 0 1.16e 2 ▇▁▁▁▁
25 number_of_reviews_l30d                               0         1     2.59e- 1        0.792  0        0        0        0        2.2 e 1 ▇▁▁▁▁
26 review_scores_rating                              1899         0.809 4.72e+ 0        0.613  0        4.67e+ 0 4.86e+ 0 5   e+ 0 5   e 0 ▁▁▁▁▇
27 review_scores_accuracy                            1998         0.799 4.83e+ 0        0.300  0        4.79e+ 0 4.92e+ 0 5   e+ 0 5   e 0 ▁▁▁▁▇
28 review_scores_cleanliness                         1998         0.799 4.69e+ 0        0.426  0        4.56e+ 0 4.81e+ 0 5   e+ 0 5   e 0 ▁▁▁▁▇
29 review_scores_checkin                             1998         0.799 4.88e+ 0        0.268  0        4.86e+ 0 4.97e+ 0 5   e+ 0 5   e 0 ▁▁▁▁▇
30 review_scores_communication                       1998         0.799 4.90e+ 0        0.261  1   e+ 0 4.89e+ 0 5   e+ 0 5   e+ 0 5   e 0 ▁▁▁▁▇
31 review_scores_location                            1999         0.799 4.82e+ 0        0.267  1   e+ 0 4.75e+ 0 4.89e+ 0 5   e+ 0 5   e 0 ▁▁▁▁▇
32 review_scores_value                               1999         0.799 4.70e+ 0        0.348  1   e+ 0 4.6 e+ 0 4.76e+ 0 4.92e+ 0 5   e 0 ▁▁▁▁▇
33 calculated_host_listings_count                       0         1     5.99e+ 0       26.3    1   e+ 0 1   e+ 0 1   e+ 0 1   e+ 0 1.86e 2 ▇▁▁▁▁
34 calculated_host_listings_count_entire_homes          0         1     5.67e+ 0       26.3    0        1   e+ 0 1   e+ 0 1   e+ 0 1.86e 2 ▇▁▁▁▁
35 calculated_host_listings_count_private_rooms         0         1     3.00e- 1        0.956  0        0        0        0        1   e 1 ▇▁▁▁▁
36 calculated_host_listings_count_shared_rooms          0         1     7.54e- 3        0.149  0        0        0        0        5   e 0 ▇▁▁▁▁
37 reviews_per_month                                 1899         0.809 6.82e- 1        1.12   1   e- 2 1.7 e- 1 3.9 e- 1 8   e- 1 3.66e 1 ▇▁▁▁▁

Variable transformatioon

data %<>% 
  mutate(price = price %>% parse_number())

Variable selection

data %<>% 
  rename(y = price) %>%
  select(y, review_scores_rating, neighbourhood_cleansed, accommodates, room_type, number_of_reviews, host_is_superhost, host_identity_verified, bedrooms) 

Filter observations

data %>% ggplot(aes(x = y)) +
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

data %<>% 
  drop_na(y) %>%
  filter(percent_rank(y) <0.95)

Cherck categories

data %>% count(room_type, sort = TRUE)
data %<>%
  filter(!(room_type %in% c('Shared room', 'Hotel room')))

Missing Data

data %<>%
    mutate(across(where(is_character), ~ifelse(.x == "", NA, .x)))
library(VIM)
data %>%
  aggr(numbers = TRUE, prop = c(TRUE, FALSE))

# We here for convenience justy decided to drop all NAs and not deal with them in a more sophisticated way
data %<>%
  drop_na()

Prediction

library(tidymodels)

split the data

data_split <- data %>% initial_split(prop = 0.75, stata = y)
data_train <- data_split %>% training()
data_test <- data_split %>% testing()
data_recipe <- data_train %>%
  recipe(y ~.) %>%
  step_center(all_numeric(), -all_outcomes()) %>%
  step_scale(all_numeric(), -all_outcomes()) %>%
  step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) 
  # %>% prep() # NOTE: Do not prepare a recipe upfront when using in a hyperparameter tuning workflow

Models & Workflows

model_lm <- linear_reg(mode = 'regression') %>%
  set_engine('lm')
model_xg <- boost_tree(mode = 'regression', 
                       trees = 100,
                       mtry = tune(), 
                       min_n = tune(), 
                       tree_depth = tune(), 
                       learn_rate = tune()
                       ) %>%
  set_engine("xgboost") 
workflow_general <- workflow() %>%
  add_recipe(data_recipe) 

workflow_lm <- workflow_general %>%
  add_model(model_lm)

workflow_xg <- workflow_general %>%
  add_model(model_xg)

HYPERPARAMETER TUNING

data_resample <- data_train %>% 
  vfold_cv(strata = y,
           v = 3,
           repeats = 2)
tune_xg <-
  tune_grid(
    workflow_xg,
    resamples = data_resample,
    grid = 10
  )
i Creating pre-processing data to finalize unknown parameter: mtry
tune_xg %>% autoplot()

best_param_xg <- tune_xg %>% select_best(metric = 'rmse')
best_param_xg
workflow_final_xg <- workflow_xg %>%
  finalize_workflow(parameters = best_param_xg)
fit_lm <- workflow_lm %>%
  fit(data_train)

fit_xg <- workflow_final_xg %>%
  fit(data_train)
pred_collected <- tibble(
  truth = data_train %>% pull(y),
  base = mean(truth),
  lm = fit_lm %>% predict(new_data = data_train) %>% pull(.pred),
  xg = fit_xg %>% predict(new_data = data_train) %>% pull(.pred),
  ) %>% 
  pivot_longer(cols = -truth,
               names_to = 'model',
               values_to = '.pred')
Warning in predict.lm(object = object$fit, newdata = new_data, type = "response") :
  prediction from a rank-deficient fit may be misleading
pred_collected %>%
  group_by(model) %>%
  rmse(truth = truth, estimate = .pred) %>%
  select(model, .estimate) %>%
  rename(RMSE = .estimate) %>%
  arrange(RMSE)
pred_collected %>%
  ggplot(aes(x = truth, y = .pred, color = model)) +
  geom_abline(lty = 2, color = "gray80", size = 1.5) +
  geom_point(alpha = 0.5) +
  labs(
    x = "Truth",
    y = "Predicted price",
    color = "Type of model"
  )

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KHNraW1yKQpgYGAKCiMgTG9hZCBkYXRhCgpgYGB7cn0KbGlzdGluZ3MgPC0gcmVhZF9jc3YoJ2h0dHA6Ly9kYXRhLmluc2lkZWFpcmJuYi5jb20vZGVubWFyay9ob3ZlZHN0YWRlbi9jb3BlbmhhZ2VuLzIwMjEtMDctMjAvZGF0YS9saXN0aW5ncy5jc3YuZ3onKQpgYGAKCmBgYHtyfQpsaXN0aW5ncyAlPiUgZ2xpbXBzZSgpCmBgYAoKCgpgYGB7cn0KIyMgQWRpdHRpb25hbCBkYXRhIChub3QgdXNlZCBzbyBmYXIpCiNjYWxlbmRhciA8LSByZWFkX2NzdignaHR0cDovL2RhdGEuaW5zaWRlYWlyYm5iLmNvbS9kZW5tYXJrL2hvdmVkc3RhZGVuL2NvcGVuaGFnZW4vMjAyMS0wNy0yMC9kYXRhL2NhbGVuZGFyLmNzdi5neicpCiNjYWxlbmRhciAlPiUgZ2xpbXBzZSgpCgojIHJldmlld3MgPC0gcmVhZF9jc3YoJ2h0dHA6Ly9kYXRhLmluc2lkZWFpcmJuYi5jb20vZGVubWFyay9ob3ZlZHN0YWRlbi9jb3BlbmhhZ2VuLzIwMjEtMDctMjAvZGF0YS9yZXZpZXdzLmNzdi5neicpCiMgcmV2aWV3cyAlPiUgZ2xpbXBzZSgpCmBgYAoKYGBge3J9CiMgVG8gcmV0YWluIHRoZSBvcmlnaW5hbCBsaXN0aW5ncyBkYXRhCmRhdGEgPC0gbGlzdGluZ3MgCmBgYAoKCiMgRURBCgpgYGB7cn0KZGF0YSAlPiUgc2tpbSgpCmBgYAoKIyBWYXJpYWJsZSB0cmFuc2Zvcm1hdGlvb24KCmBgYHtyfQpkYXRhICU8PiUgCiAgbXV0YXRlKHByaWNlID0gcHJpY2UgJT4lIHBhcnNlX251bWJlcigpKQpgYGAKCiMgVmFyaWFibGUgc2VsZWN0aW9uCgpgYGB7cn0KZGF0YSAlPD4lIAogIHJlbmFtZSh5ID0gcHJpY2UpICU+JQogIHNlbGVjdCh5LCByZXZpZXdfc2NvcmVzX3JhdGluZywgbmVpZ2hib3VyaG9vZF9jbGVhbnNlZCwgYWNjb21tb2RhdGVzLCByb29tX3R5cGUsIG51bWJlcl9vZl9yZXZpZXdzLCBob3N0X2lzX3N1cGVyaG9zdCwgaG9zdF9pZGVudGl0eV92ZXJpZmllZCwgYmVkcm9vbXMpIApgYGAKCiMgRmlsdGVyIG9ic2VydmF0aW9ucwoKYGBge3J9CmRhdGEgJT4lIGdncGxvdChhZXMoeCA9IHkpKSArCiAgZ2VvbV9oaXN0b2dyYW0oKQpgYGAKCgpgYGB7cn0KZGF0YSAlPD4lIAogIGRyb3BfbmEoeSkgJT4lCiAgZmlsdGVyKHBlcmNlbnRfcmFuayh5KSA8MC45NSkKYGBgCgojIENoZXJjayBjYXRlZ29yaWVzCgpgYGB7cn0KZGF0YSAlPiUgY291bnQocm9vbV90eXBlLCBzb3J0ID0gVFJVRSkKYGBgCgpgYGB7cn0KZGF0YSAlPD4lCiAgZmlsdGVyKCEocm9vbV90eXBlICVpbiUgYygnU2hhcmVkIHJvb20nLCAnSG90ZWwgcm9vbScpKSkKYGBgCgojIE1pc3NpbmcgRGF0YQoKYGBge3J9CmRhdGEgJTw+JQogICAgbXV0YXRlKGFjcm9zcyh3aGVyZShpc19jaGFyYWN0ZXIpLCB+aWZlbHNlKC54ID09ICIiLCBOQSwgLngpKSkKYGBgCgpgYGB7cn0KbGlicmFyeShWSU0pCmRhdGEgJT4lCiAgYWdncihudW1iZXJzID0gVFJVRSwgcHJvcCA9IGMoVFJVRSwgRkFMU0UpKQpgYGAKCmBgYHtyfQojIFdlIGhlcmUgZm9yIGNvbnZlbmllbmNlIGp1c3R5IGRlY2lkZWQgdG8gZHJvcCBhbGwgTkFzIGFuZCBub3QgZGVhbCB3aXRoIHRoZW0gaW4gYSBtb3JlIHNvcGhpc3RpY2F0ZWQgd2F5CmRhdGEgJTw+JQogIGRyb3BfbmEoKQpgYGAKCiMgUHJlZGljdGlvbgoKYGBge3J9CmxpYnJhcnkodGlkeW1vZGVscykKYGBgCgojIyBzcGxpdCB0aGUgZGF0YQoKYGBge3J9CmRhdGFfc3BsaXQgPC0gZGF0YSAlPiUgaW5pdGlhbF9zcGxpdChwcm9wID0gMC43NSwgc3RhdGEgPSB5KQpgYGAKCmBgYHtyfQpkYXRhX3RyYWluIDwtIGRhdGFfc3BsaXQgJT4lIHRyYWluaW5nKCkKZGF0YV90ZXN0IDwtIGRhdGFfc3BsaXQgJT4lIHRlc3RpbmcoKQpgYGAKCmBgYHtyfQpkYXRhX3JlY2lwZSA8LSBkYXRhX3RyYWluICU+JQogIHJlY2lwZSh5IH4uKSAlPiUKICBzdGVwX2NlbnRlcihhbGxfbnVtZXJpYygpLCAtYWxsX291dGNvbWVzKCkpICU+JQogIHN0ZXBfc2NhbGUoYWxsX251bWVyaWMoKSwgLWFsbF9vdXRjb21lcygpKSAlPiUKICBzdGVwX2R1bW15KGFsbF9ub21pbmFsKCksIC1hbGxfb3V0Y29tZXMoKSwgb25lX2hvdCA9IFRSVUUpIAogICMgJT4lIHByZXAoKSAjIE5PVEU6IERvIG5vdCBwcmVwYXJlIGEgcmVjaXBlIHVwZnJvbnQgd2hlbiB1c2luZyBpbiBhIGh5cGVycGFyYW1ldGVyIHR1bmluZyB3b3JrZmxvdwpgYGAKCiMgTW9kZWxzICYgV29ya2Zsb3dzCgpgYGB7cn0KbW9kZWxfbG0gPC0gbGluZWFyX3JlZyhtb2RlID0gJ3JlZ3Jlc3Npb24nKSAlPiUKICBzZXRfZW5naW5lKCdsbScpCmBgYAoKCmBgYHtyfQptb2RlbF94ZyA8LSBib29zdF90cmVlKG1vZGUgPSAncmVncmVzc2lvbicsIAogICAgICAgICAgICAgICAgICAgICAgIHRyZWVzID0gMTAwLAogICAgICAgICAgICAgICAgICAgICAgIG10cnkgPSB0dW5lKCksIAogICAgICAgICAgICAgICAgICAgICAgIG1pbl9uID0gdHVuZSgpLCAKICAgICAgICAgICAgICAgICAgICAgICB0cmVlX2RlcHRoID0gdHVuZSgpLCAKICAgICAgICAgICAgICAgICAgICAgICBsZWFybl9yYXRlID0gdHVuZSgpCiAgICAgICAgICAgICAgICAgICAgICAgKSAlPiUKICBzZXRfZW5naW5lKCJ4Z2Jvb3N0IikgCmBgYAoKYGBge3J9CndvcmtmbG93X2dlbmVyYWwgPC0gd29ya2Zsb3coKSAlPiUKICBhZGRfcmVjaXBlKGRhdGFfcmVjaXBlKSAKCndvcmtmbG93X2xtIDwtIHdvcmtmbG93X2dlbmVyYWwgJT4lCiAgYWRkX21vZGVsKG1vZGVsX2xtKQoKd29ya2Zsb3dfeGcgPC0gd29ya2Zsb3dfZ2VuZXJhbCAlPiUKICBhZGRfbW9kZWwobW9kZWxfeGcpCmBgYAoKIyMgSFlQRVJQQVJBTUVURVIgVFVOSU5HCgpgYGB7cn0KZGF0YV9yZXNhbXBsZSA8LSBkYXRhX3RyYWluICU+JSAKICB2Zm9sZF9jdihzdHJhdGEgPSB5LAogICAgICAgICAgIHYgPSAzLAogICAgICAgICAgIHJlcGVhdHMgPSAyKQpgYGAKCgpgYGB7cn0KdHVuZV94ZyA8LQogIHR1bmVfZ3JpZCgKICAgIHdvcmtmbG93X3hnLAogICAgcmVzYW1wbGVzID0gZGF0YV9yZXNhbXBsZSwKICAgIGdyaWQgPSAxMAogICkKYGBgCgpgYGB7cn0KdHVuZV94ZyAlPiUgYXV0b3Bsb3QoKQpgYGAKYGBge3J9CmJlc3RfcGFyYW1feGcgPC0gdHVuZV94ZyAlPiUgc2VsZWN0X2Jlc3QobWV0cmljID0gJ3Jtc2UnKQpiZXN0X3BhcmFtX3hnCmBgYAoKYGBge3J9CndvcmtmbG93X2ZpbmFsX3hnIDwtIHdvcmtmbG93X3hnICU+JQogIGZpbmFsaXplX3dvcmtmbG93KHBhcmFtZXRlcnMgPSBiZXN0X3BhcmFtX3hnKQpgYGAKCmBgYHtyfQpmaXRfbG0gPC0gd29ya2Zsb3dfbG0gJT4lCiAgZml0KGRhdGFfdHJhaW4pCgpmaXRfeGcgPC0gd29ya2Zsb3dfZmluYWxfeGcgJT4lCiAgZml0KGRhdGFfdHJhaW4pCmBgYAoKCmBgYHtyfQpwcmVkX2NvbGxlY3RlZCA8LSB0aWJibGUoCiAgdHJ1dGggPSBkYXRhX3RyYWluICU+JSBwdWxsKHkpLAogIGJhc2UgPSBtZWFuKHRydXRoKSwKICBsbSA9IGZpdF9sbSAlPiUgcHJlZGljdChuZXdfZGF0YSA9IGRhdGFfdHJhaW4pICU+JSBwdWxsKC5wcmVkKSwKICB4ZyA9IGZpdF94ZyAlPiUgcHJlZGljdChuZXdfZGF0YSA9IGRhdGFfdHJhaW4pICU+JSBwdWxsKC5wcmVkKSwKICApICU+JSAKICBwaXZvdF9sb25nZXIoY29scyA9IC10cnV0aCwKICAgICAgICAgICAgICAgbmFtZXNfdG8gPSAnbW9kZWwnLAogICAgICAgICAgICAgICB2YWx1ZXNfdG8gPSAnLnByZWQnKQpgYGAKCmBgYHtyfQpwcmVkX2NvbGxlY3RlZCAlPiUKICBncm91cF9ieShtb2RlbCkgJT4lCiAgcm1zZSh0cnV0aCA9IHRydXRoLCBlc3RpbWF0ZSA9IC5wcmVkKSAlPiUKICBzZWxlY3QobW9kZWwsIC5lc3RpbWF0ZSkgJT4lCiAgcmVuYW1lKFJNU0UgPSAuZXN0aW1hdGUpICU+JQogIGFycmFuZ2UoUk1TRSkKYGBgCgpgYGB7cn0KcHJlZF9jb2xsZWN0ZWQgJT4lCiAgZ2dwbG90KGFlcyh4ID0gdHJ1dGgsIHkgPSAucHJlZCwgY29sb3IgPSBtb2RlbCkpICsKICBnZW9tX2FibGluZShsdHkgPSAyLCBjb2xvciA9ICJncmF5ODAiLCBzaXplID0gMS41KSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuNSkgKwogIGxhYnMoCiAgICB4ID0gIlRydXRoIiwKICAgIHkgPSAiUHJlZGljdGVkIHByaWNlIiwKICAgIGNvbG9yID0gIlR5cGUgb2YgbW9kZWwiCiAgKQpgYGAKCg==