### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidytext)

Download the data

# download and open some Trump tweets from trump_tweet_data_archive
library(jsonlite)
tmp <- tempfile()
download.file("https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pol_tweets.gz", tmp)
trying URL 'https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pol_tweets.gz'
Content type 'application/octet-stream' length 7342085 bytes (7.0 MB)
==================================================
downloaded 7.0 MB
tweets_raw <- stream_in(gzfile(tmp, "pol_tweets"))

 Found 1 records...
 Imported 1 records. Simplifying...
tweets_raw %>% glimpse()
Rows: 1
Columns: 2
$ text   <df[,50000]> <data.frame[1 x 50000]>
$ labels <df[,50000]> <data.frame[1 x 50000]>
tweets <- tibble(ID = colnames(tweets_raw[[1]]), 
                 text = tweets_raw[[1]] %>% as.character(), 
                 labels = tweets_raw[[2]] %>% as.logical())
#rm(tweets_raw)
tweets %>% glimpse()
Rows: 50,000
Columns: 3
$ ID     <chr> "340675", "289492", "371088", "82212", "476047", "220741", "379074", "633731", "103805", "401277", "493433", "578814", "570425", "307632", "634542", "276734…
$ text   <chr> "RT @GreenBeretFound Today we remember Sgt. 1st Class Ryan J. Savard killed in action on this day eight years ago. SFC Savard was assigned to U.S. Army Spec…
$ labels <lgl> FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TR…
tweets %<>%
  filter(!(text %>% str_detect('^RT'))) # Filter retweets
tweets %>% head()

Tidying

tweets_tidy <- tweets %>%
  unnest_tokens(word, text, token = "tweets") 
tweets_tidy %>% head(50)
tweets_tidy %>% count(word, sort = TRUE)

Preprocessing

# preprocessing
tweets_tidy %<>%
  filter(!(word %>% str_detect('@'))) %>% # remove hashtags and mentions
  filter(!(word %>% str_detect('^amp|^http|^t\\.co'))) %>% # Twitter specific stuff
#  mutate(word = word %>% str_remove_all('[^[:alnum:]]')) %>% ## remove all special characters
  filter(str_length(word) > 2 ) %>% # Remove words with less than  3 characters
  group_by(word) %>%
  filter(n() > 100) %>% # remove words occuring less than 100 times
  ungroup() %>%
  anti_join(stop_words, by = 'word') # remove stopwords

TFIDF

TFIDF weighting

# top words
tweets_tidy %>%
  count(word, sort = TRUE) %>%
  head(20)
# TFIDF weights
tweets_tidy %<>%
  add_count(ID, word) %>%
  bind_tf_idf(term = word,
              document = ID,
              n = n)
# TFIDF topwords
tweets_tidy %>%
  count(word, wt = tf_idf, sort = TRUE) %>%
  head(20)

Inspecting

Words by party affiliation

labels_words <- tweets_tidy %>%
  group_by(labels) %>%
  count(word, wt = tf_idf, sort = TRUE, name = "tf_idf") %>%
  slice(1:100) %>%
  ungroup() 
labels_words %>%
  mutate(word = reorder_within(word, by = tf_idf, within = labels)) %>%
  ggplot(aes(x = word, y = tf_idf, fill = labels)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~labels, ncol = 2, scales = "free") +
  coord_flip() +
  scale_x_reordered()

Distance

tweets_tidy %>% head()

Predictive model

library(tidymodels)

Simple manual baseline

words_classifier <- labels_words %>%
  arrange(desc(tf_idf)) %>%
  distinct(word, .keep_all = TRUE) %>%
  select(-tf_idf)
tweet_null_model <- tweets_tidy %>%
  inner_join(labels_words, by = 'word')
null_res <- tweet_null_model %>%
  group_by(ID) %>%
  summarise(truth = mean(labels.x, na.rm = TRUE) %>% round(0),
         pred = mean(labels.y, na.rm = TRUE) %>% round(0))
table(null_res$truth, null_res$pred)
   
        0     1
  0  8842  2609
  1 11327  9235

Preprocessing

# Notice, we use the initial untokenized tweets
data <- tweets %>%
  select(labels, text) %>%
  rename(y = labels) %>%
  mutate(y = y  %>% as.factor()) 

Training & Test split

data_split <- initial_split(data, prop = 0.75, strata = y)

data_train <- data_split  %>%  training()
data_test <- data_split %>% testing()

Preprocessing pipeline

library(textrecipes) # Adittional recipes for working with text data
# This recipe pretty much reconstructs all preprocessing we did so far
data_recipe <- data_train %>%
  recipe(y ~.) %>%
  themis::step_downsample(y) %>% # For downsampling class imbalances (optimal)
  step_filter(!(text %>% str_detect('^RT'))) %>% # Upfront filtering retweets
  step_filter(text != "") %>%
  step_tokenize(text, token = "tweets") %>% # tokenize
  step_tokenfilter(text, min_times = 75) %>%  # Filter out rare words
  step_stopwords(text, keep = FALSE) %>% # Filter stopwords
  step_tfidf(text) %>% # TFIDF weighting
  #step_pca(all_predictors()) %>% # Dimensionality reduction via PCA (optional)
  prep() # NOTE: Only prep the recipe when not using in a workflow
data_recipe
Data Recipe

Inputs:

Training data contained 26241 data points and no missing data.

Operations:

Down-sampling based on y [trained]
Row filtering [trained]
Row filtering [trained]
Tokenization for text [trained]
Text filtering for text [trained]
Stop word removal for text [trained]
Term frequency-inverse document frequency with text [trained]

Since we will not do hyperparameter tuning, we directly bake/juice the recipe

data_train_prep <- data_recipe %>% juice()
data_test_prep <- data_recipe %>% bake(data_test)

Defining the models

model_en <- logistic_reg(mode = 'classification', 
                         mixture = 0.5, 
                         penalty = 0.5) %>%
  set_engine('glm', family = binomial) 

Define the workflow

We will skip the workflow step this time, since we do not evaluate different models against each others.

fit the model

fit_en <- model_en %>% fit(formula = y ~., data = data_train_prep)
pred_collected <- tibble(
  truth = data_train_prep %>% pull(y),
  pred = fit_en %>% predict(new_data = data_train_prep) %>% pull(.pred_class),
  pred_prob = fit_en %>% predict(new_data = data_train_prep, type = "prob") %>% pull(.pred_TRUE),
  ) 
pred_collected %>% conf_mat(truth, pred)
          Truth
Prediction FALSE TRUE
     FALSE  6243 4276
     TRUE   3356 5323
pred_collected %>% conf_mat(truth, pred) %>% summary()

Well… soso

Using the model for new prediction

Simple test

# How would the model predict given some tweet text
pred_own = tibble(text = 'USA USA WE NEED A WALL TO MAKE AMERICA GREAT AGAIN AND KEEP THE MEXICANS AND ALL REALLY BAD COUNTRIES OUT! AMNERICA FIRST')
fit_en %>% predict(new_data = data_recipe %>% bake(pred_own))

New data

  • We could also use the model to predict on new data, such as the just scraped discussion on the presidential debate.
# download and open some Trump tweets from trump_tweet_data_archive
library(jsonlite)
tmp <- tempfile()
download.file("https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pres_debate_2020.gz", tmp)
trying URL 'https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pres_debate_2020.gz'
Content type 'application/octet-stream' length 2095527 bytes (2.0 MB)
==================================================
downloaded 2.0 MB
tweets_raw_new <- stream_in(gzfile(tmp, "pres_debate_2020"))

 Found 1 records...
 Imported 1 records. Simplifying...
tweets_raw_new %>% glimpse()
Rows: 1
Columns: 33
$ id              <df[,8811]> <data.frame[1 x 8811]>
$ conversation_id <df[,8811]> <data.frame[1 x 8811]>
$ created_at      <df[,8811]> <data.frame[1 x 8811]>
$ date            <df[,8811]> <data.frame[1 x 8811]>
$ timezone        <df[,8811]> <data.frame[1 x 8811]>
$ place           <df[,8811]> <data.frame[1 x 8811]>
$ tweet           <df[,8811]> <data.frame[1 x 8811]>
$ language        <df[,8811]> <data.frame[1 x 8811]>
$ hashtags        <df[,8811]> <data.frame[1 x 8811]>
$ cashtags        <df[,8811]> <data.frame[1 x 8811]>
$ user_id         <df[,8811]> <data.frame[1 x 8811]>
$ user_id_str     <df[,8811]> <data.frame[1 x 8811]>
$ username        <df[,8811]> <data.frame[1 x 8811]>
$ name            <df[,8811]> <data.frame[1 x 8811]>
$ day             <df[,8811]> <data.frame[1 x 8811]>
$ hour            <df[,8811]> <data.frame[1 x 8811]>
$ link            <df[,8811]> <data.frame[1 x 8811]>
$ urls            <df[,8811]> <data.frame[1 x 8811]>
$ photos          <df[,8811]> <data.frame[1 x 8811]>
$ video           <df[,8811]> <data.frame[1 x 8811]>
$ thumbnail       <df[,8811]> <data.frame[1 x 8811]>
$ nlikes          <df[,8811]> <data.frame[1 x 8811]>
$ nreplies        <df[,8811]> <data.frame[1 x 8811]>
$ nretweets       <df[,8811]> <data.frame[1 x 8811]>
$ quote_url       <df[,8811]> <data.frame[1 x 8811]>
$ search          <df[,8811]> <data.frame[1 x 8811]>
$ near            <df[,8811]> <data.frame[1 x 8811]>
$ geo             <df[,8811]> <data.frame[1 x 8811]>
$ source          <df[,8811]> <data.frame[1 x 8811]>
$ reply_to        <df[,8811]> <data.frame[1 x 8811]>
$ translate       <df[,8811]> <data.frame[1 x 8811]>
$ trans_src       <df[,8811]> <data.frame[1 x 8811]>
$ trans_dest      <df[,8811]> <data.frame[1 x 8811]>
tweets_new <- tibble(ID = tweets_raw_new$id[1,] %>% t() %>% as.character(), 
                     text = tweets_raw_new$tweet[1,] %>% t() %>% as.character())
#rm(tweets_raw_new)
tweets_new %>% glimpse()
Rows: 8,811
Columns: 2
$ ID   <chr> "1318944772183281664", "1318938583122743296", "1318932554897031169", "1318928783169245185", "1318927150247018498", "1318926624327454721", "1318926578647207939…
$ text <chr> "Still time to register: Students can join the @UVADemocracy Student Advisory Council for a socially-distanced Presidential Debate Watch Party on Thursday, Oc…
data_new <- data_recipe %>% bake(tweets_new)
data_new %>% glimpse()
Rows: 8,811
Columns: 43
$ `tfidf_text_#COVID19`  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_act         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_american    <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_americans   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000…
$ tfidf_text_amp         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 1.9806353, 0.0000000, 0.000000…
$ tfidf_text_bill        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_businesses  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_can         <dbl> 1.4189092, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 1.4189092, 0.0000000, 0.0000000, 1.418909…
$ tfidf_text_care        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1.670964, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_congress    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_coronavirus <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_country     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_covid19     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_day         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 2.244936, 0.000000, …
$ tfidf_text_get         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_great       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000…
$ tfidf_text_health      <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1.676625, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_help        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_house       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_im          <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_just        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_make        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1.289476, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_must        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_need        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_new         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_now         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_one         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_pandemic    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_people      <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_president   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 1.4500356, 0.0000000, 0.0000000, 0.000000…
$ tfidf_text_qt          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_senate      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_small       <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_state       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_support     <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000…
$ tfidf_text_thank       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_time        <dbl> 1.4925070, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000…
$ tfidf_text_today       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_trump       <dbl> 0.0000000, 2.1272525, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.7090842, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000…
$ tfidf_text_us          <dbl> 0.000000, 0.000000, 3.432999, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 3.432999, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_vote        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ tfidf_text_work        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, …
$ tfidf_text_workers     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
pred_new <- fit_en %>% predict(new_data = data_new)
tweets_new %<>%
  mutate(pred = pred_new %>% pull(.pred_class))
tweets_new %>% count(pred)
tweets_tidy_new <- tweets_new %>%
  unnest_tokens(word, text, token = "tweets") 
# preprocessing
tweets_tidy_new %<>%
  filter(!(word %>% str_detect('@|#presidential'))) %>% # remove hashtags and mentions
  filter(!(word %>% str_detect('^amp|^http|^t\\.co'))) %>% # Twitter specific stuff
#  mutate(word = word %>% str_remove_all('[^[:alnum:]]')) %>% ## remove all special characters
  filter(str_length(word) > 2 ) %>% # Remove words with less than  3 characters
  group_by(word) %>%
  filter(n() > 100) %>% # remove words occuring less than 100 times
  ungroup() %>%
  anti_join(stop_words, by = 'word') # remove stopwords
# TFIDF weights
tweets_tidy_new %<>%
  add_count(ID, word) %>%
  bind_tf_idf(term = word,
              document = ID,
              n = n)
labels_words_new <- tweets_tidy_new %>%
  group_by(pred) %>%
  count(word, wt = tf_idf, sort = TRUE, name = "tf_idf") %>%
  slice(1:20) %>%
  ungroup() 
hashtags_words_new <- tweets_tidy_new %>%
  filter(word %>% str_detect('#')) %>%
  group_by(pred) %>%
  count(word, wt = tf_idf, sort = TRUE, name = "tf_idf") %>%
  slice(1:20) %>%
  ungroup() 
labels_words_new %>%
  mutate(word = reorder_within(word, by = tf_idf, within = pred)) %>%
  ggplot(aes(x = word, y = tf_idf, fill = pred)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~pred, ncol = 2, scales = "free") +
  coord_flip() +
  scale_x_reordered()

hashtags_words_new %>%
  mutate(word = reorder_within(word, by = tf_idf, within = pred)) %>%
  ggplot(aes(x = word, y = tf_idf, fill = pred)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~pred, ncol = 2, scales = "free") +
  coord_flip() +
  scale_x_reordered()

To be continued by you :)

X

X

X

X

X

Topic models (LDA) on new data

# for LDA analysis
library(topicmodels)

Preparing the Data

# LDA via the topicmodel package requires a document-term-matrix (dtm)
tweets_dtm <- tweets_tidy_new %>%
  cast_dtm(document = ID, term = word, value = n)

Lets take a look:

tweets_dtm
<<DocumentTermMatrix (documents: 7567, terms: 95)>>
Non-/sparse entries: 22637/696228
Sparsity           : 97%
Maximal term length: 27
Weighting          : term frequency (tf)
  • We see again hat the matrix is still rather sparse, which is an artefact of text data generally, but even more so when using twitter data.
  • Lets try to see if we could reduce that somewhat by deleting less often used terms.
library(tm)
tweets_dtm %>% removeSparseTerms(sparse = .99)
<<DocumentTermMatrix (documents: 7567, terms: 95)>>
Non-/sparse entries: 22637/696228
Sparsity           : 97%
Maximal term length: 27
Weighting          : term frequency (tf)
  • Now we can perform a LDA, using the more accurate Gibbs sampling as method.
tweets_lda <- tweets_dtm %>% 
  LDA(k = 6, method = "Gibbs",
      control = list(seed = 1337))

\(\beta\): Word-Topic Association

  • \(\beta\) is an output of the LDA model, indicating the propability that a word occurs in a certain topic.
  • Therefore, loking at the top probability words of a topic often gives us a good intuition regarding its properties.
# LDA output is defined for tidy(), so we can easily extract it
lda_beta <- tweets_lda %>% 
  tidy(matrix = "beta") %>%
  group_by(topic) %>%
  arrange(topic, desc(beta)) %>%
  slice(1:10) %>%
  ungroup() 
lda_beta %>% head()
# Notice the "reorder_within()"
lda_beta %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  group_by(topic, term) %>%    
  arrange(desc(beta)) %>%  
  ungroup() %>%
  ggplot(aes(term, beta, fill = as.factor(topic))) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_x_reordered() +
  labs(title = "Top 10 terms in each LDA topic",
       x = NULL, y = expression(beta)) +
  facet_wrap(~ topic, ncol = 2, scales = "free")

\(\gamma\): Document-Topic Association

  • In LDA, documents are represented as a mix of topics. This association of a document to a topic is captured by \(\gamma\)
lda_gamma <- tweets_lda %>% 
  tidy(matrix = "gamma")
lda_gamma %>% head()
lda_gamma %>%
  ggplot(aes(gamma)) +
  geom_histogram() +
  scale_y_log10() +
  labs(title = "Distribution of probabilities for all topics",
       y = "Number of documents", x = expression(gamma))

lda_gamma %<>%
  left_join(tweets_new %>% select(ID, pred), by = c('document' = 'ID'))
lda_gamma %>%
  group_by(pred, topic) %>%
  summarise(gamma = sum(gamma)) %>%
  arrange(pred, gamma)
lda_gamma %>%
  ggplot(aes(gamma, fill = as.factor(topic))) +
  geom_histogram(show.legend = FALSE) +
  facet_wrap(~ topic, ncol = 2) +
  scale_y_log10() +
  labs(title = "Distribution of probability for each topic",
       y = "Number of documents", x = expression(gamma))

top_topics <- tweets_lda %>% 
  tidy(matrix = "gamma")  %>%
  group_by(document) %>%
  top_n(1, wt = gamma) %>%
  ungroup()
top_topics %>%
  count(topic)
topicmodels_json_ldavis <- function(fitted, doc_dtm, method = "PCA", doc_in = NULL, topic_in = NULL){
  require(topicmodels); require(dplyr); require(LDAvis)
  
  # Find required quantities
  phi <- posterior(fitted)$terms %>% as.matrix() # Topic-term distribution
  theta <- posterior(fitted)$topics %>% as.matrix() # Document-topic matrix
  
  # # Restrict (not working atm)
  # if(!is_null(ID_in)){theta <- theta[rownames(theta) %in%  doc_in,]; doc_fm  %<>% dfm_subset(dimnames(doc_fm)$docs %in% doc_in)}
  
  # Restrict
  if(!is_null(topic_in)){
    phi <- phi[topic_in, ]
    theta <- theta[ , topic_in]
  }
  text_tidy <- doc_dtm %>% tidy()
  vocab <- colnames(phi)
  doc_length <- tibble(document = rownames(theta)) %>% left_join(text_tidy %>% count(document, wt = count), by = 'document')
  tf <- tibble(term = vocab) %>% left_join(text_tidy %>% count(term, wt = count), by = "term") 
  
  if(method == "PCA"){mds <- jsPCA}
  if(method == "TSNE"){library(tsne); mds <- function(x){tsne(svd(x)$u)} }
  
  # Convert to json
  json_lda <- LDAvis::createJSON(phi = phi, theta = theta, vocab = vocab, doc.length = doc_length %>% pull(n), term.frequency = tf %>% pull(n),
                                 reorder.topics = FALSE, mds.method = mds,plot.opts = list(xlab = "Dim.1", ylab = "Dim.2")) 
  return(json_lda)
}
library(LDAvis)
json_lda <- topicmodels_json_ldavis(fitted = tweets_lda, 
                                    doc_dtm = tweets_dtm, 
                                    method = "TSNE")
json_lda %>% serVis()
# json_lda %>% serVis(out.dir = 'LDAviz')

Endnotes

Packages & Ecosystem

Further NLP packages ecosystem

References

  • Julia Silge and David Robinson (2020). Text Mining with R: A Tidy Approach, O’Reilly. Online available here
  • Emil Hvidfeldt and Julia Silge (2020). Supervised Machine Learning for Text Analysis in R, online available here

Further sources

Datacamp

Other online

  • Julia Silge’s Blog: Full of great examples of predictive modeling, NLP, and the combination fo both, using tidy ecosystems

Session Info

sessionInfo()
LS0tCnRpdGxlOiAnTkxQIHdvcmtzaG9wIC0gRXhwbG9yaW5nIFByZXNpZGVudGlhbCBEZWJhdGUgb24gdHdpdHRlcicKYXV0aG9yOiAiRGFuaWVsIFMuIEhhaW4gKGRzaEBidXNpbmVzcy5hYXUuZGspIgpkYXRlOiAiVXBkYXRlZCBgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVCICVkLCAlWScpYCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGRmX3ByaW50OiBwYWdlZAogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDIKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiBmYWxzZQogICAgdGhlbWU6IGZsYXRseQotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQojIyMgR2VuZXJpYyBwcmVhbWJsZQpybShsaXN0PWxzKCkpClN5cy5zZXRlbnYoTEFORyA9ICJlbiIpICMgRm9yIGVuZ2xpc2ggbGFuZ3VhZ2UKb3B0aW9ucyhzY2lwZW4gPSA1KSAjIFRvIGRlYWN0aXZhdGUgYW5ub3lpbmcgc2NpZW50aWZpYyBudW1iZXIgbm90YXRpb24KCiMjIyBLbml0ciBvcHRpb25zCmxpYnJhcnkoa25pdHIpICMgRm9yIGRpc3BsYXkgb2YgdGhlIG1hcmtkb3duCmtuaXRyOjpvcHRzX2NodW5rJHNldCh3YXJuaW5nPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICBtZXNzYWdlPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICBjb21tZW50PUZBTFNFLCAKICAgICAgICAgICAgICAgICAgICAgZmlnLmFsaWduPSJjZW50ZXIiCiAgICAgICAgICAgICAgICAgICAgICkKYGBgCgpgYGB7cn0KIyMjIExvYWQgc3RhbmRhcmRwYWNrYWdlcwpsaWJyYXJ5KHRpZHl2ZXJzZSkgIyBDb2xsZWN0aW9uIG9mIGFsbCB0aGUgZ29vZCBzdHVmZiBsaWtlIGRwbHlyLCBnZ3Bsb3QyIGVjdC4KbGlicmFyeShtYWdyaXR0cikgIyBGb3IgZXh0cmEtcGlwaW5nIG9wZXJhdG9ycyAoZWcuICU8PiUpCmBgYAoKYGBge3J9CmxpYnJhcnkodGlkeXRleHQpCmBgYAoKCiMgRG93bmxvYWQgdGhlIGRhdGEKCmBgYHtyfQojIGRvd25sb2FkIGFuZCBvcGVuIHNvbWUgVHJ1bXAgdHdlZXRzIGZyb20gdHJ1bXBfdHdlZXRfZGF0YV9hcmNoaXZlCmxpYnJhcnkoanNvbmxpdGUpCnRtcCA8LSB0ZW1wZmlsZSgpCmRvd25sb2FkLmZpbGUoImh0dHBzOi8vZ2l0aHViLmNvbS9TRFMtQUFVL1NEUy1tYXN0ZXIvcmF3L21hc3Rlci9NMi9kYXRhL3BvbF90d2VldHMuZ3oiLCB0bXApCgp0d2VldHNfcmF3IDwtIHN0cmVhbV9pbihnemZpbGUodG1wLCAicG9sX3R3ZWV0cyIpKQpgYGAKCmBgYHtyfQp0d2VldHNfcmF3ICU+JSBnbGltcHNlKCkKYGBgCgpgYGB7cn0KdHdlZXRzIDwtIHRpYmJsZShJRCA9IGNvbG5hbWVzKHR3ZWV0c19yYXdbWzFdXSksIAogICAgICAgICAgICAgICAgIHRleHQgPSB0d2VldHNfcmF3W1sxXV0gJT4lIGFzLmNoYXJhY3RlcigpLCAKICAgICAgICAgICAgICAgICBsYWJlbHMgPSB0d2VldHNfcmF3W1syXV0gJT4lIGFzLmxvZ2ljYWwoKSkKI3JtKHR3ZWV0c19yYXcpCmBgYAoKYGBge3J9CnR3ZWV0cyAlPiUgZ2xpbXBzZSgpCmBgYAoKYGBge3J9CnR3ZWV0cyAlPD4lCiAgZmlsdGVyKCEodGV4dCAlPiUgc3RyX2RldGVjdCgnXlJUJykpKSAjIEZpbHRlciByZXR3ZWV0cwpgYGAKCmBgYHtyfQp0d2VldHMgJT4lIGhlYWQoKQpgYGAKCiMgVGlkeWluZwoKYGBge3J9CnR3ZWV0c190aWR5IDwtIHR3ZWV0cyAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQsIHRva2VuID0gInR3ZWV0cyIpIApgYGAKCmBgYHtyfQp0d2VldHNfdGlkeSAlPiUgaGVhZCg1MCkKYGBgCgoKYGBge3J9CnR3ZWV0c190aWR5ICU+JSBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkKYGBgCgoKIyBQcmVwcm9jZXNzaW5nCgpgYGB7cn0KIyBwcmVwcm9jZXNzaW5nCnR3ZWV0c190aWR5ICU8PiUKICBmaWx0ZXIoISh3b3JkICU+JSBzdHJfZGV0ZWN0KCdAJykpKSAlPiUgIyByZW1vdmUgaGFzaHRhZ3MgYW5kIG1lbnRpb25zCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnXmFtcHxeaHR0cHxedFxcLmNvJykpKSAlPiUgIyBUd2l0dGVyIHNwZWNpZmljIHN0dWZmCiMgIG11dGF0ZSh3b3JkID0gd29yZCAlPiUgc3RyX3JlbW92ZV9hbGwoJ1teWzphbG51bTpdXScpKSAlPiUgIyMgcmVtb3ZlIGFsbCBzcGVjaWFsIGNoYXJhY3RlcnMKICBmaWx0ZXIoc3RyX2xlbmd0aCh3b3JkKSA+IDIgKSAlPiUgIyBSZW1vdmUgd29yZHMgd2l0aCBsZXNzIHRoYW4gIDMgY2hhcmFjdGVycwogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPiAxMDApICU+JSAjIHJlbW92ZSB3b3JkcyBvY2N1cmluZyBsZXNzIHRoYW4gMTAwIHRpbWVzCiAgdW5ncm91cCgpICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzLCBieSA9ICd3b3JkJykgIyByZW1vdmUgc3RvcHdvcmRzCmBgYAoKIyBURklERgoKVEZJREYgd2VpZ2h0aW5nCgpgYGB7cn0KIyB0b3Agd29yZHMKdHdlZXRzX3RpZHkgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFRSVUUpICU+JQogIGhlYWQoMjApCmBgYAoKYGBge3J9CiMgVEZJREYgd2VpZ2h0cwp0d2VldHNfdGlkeSAlPD4lCiAgYWRkX2NvdW50KElELCB3b3JkKSAlPiUKICBiaW5kX3RmX2lkZih0ZXJtID0gd29yZCwKICAgICAgICAgICAgICBkb2N1bWVudCA9IElELAogICAgICAgICAgICAgIG4gPSBuKQpgYGAKCgpgYGB7cn0KIyBURklERiB0b3B3b3Jkcwp0d2VldHNfdGlkeSAlPiUKICBjb3VudCh3b3JkLCB3dCA9IHRmX2lkZiwgc29ydCA9IFRSVUUpICU+JQogIGhlYWQoMjApCmBgYAoKIyBJbnNwZWN0aW5nCgojIyBXb3JkcyBieSBwYXJ0eSBhZmZpbGlhdGlvbgoKYGBge3J9CmxhYmVsc193b3JkcyA8LSB0d2VldHNfdGlkeSAlPiUKICBncm91cF9ieShsYWJlbHMpICU+JQogIGNvdW50KHdvcmQsIHd0ID0gdGZfaWRmLCBzb3J0ID0gVFJVRSwgbmFtZSA9ICJ0Zl9pZGYiKSAlPiUKICBzbGljZSgxOjEwMCkgJT4lCiAgdW5ncm91cCgpIApgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTB9CmxhYmVsc193b3JkcyAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXJfd2l0aGluKHdvcmQsIGJ5ID0gdGZfaWRmLCB3aXRoaW4gPSBsYWJlbHMpKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSB3b3JkLCB5ID0gdGZfaWRmLCBmaWxsID0gbGFiZWxzKSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gInRmLWlkZiIpICsKICBmYWNldF93cmFwKH5sYWJlbHMsIG5jb2wgPSAyLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHNjYWxlX3hfcmVvcmRlcmVkKCkKYGBgCgojIyBEaXN0YW5jZQoKYGBge3J9CnR3ZWV0c190aWR5ICU+JSBoZWFkKCkKYGBgCgpgYGB7cn0KCmBgYAoKCiMgUHJlZGljdGl2ZSBtb2RlbAoKYGBge3J9CmxpYnJhcnkodGlkeW1vZGVscykKYGBgCgojIyBTaW1wbGUgbWFudWFsIGJhc2VsaW5lCgpgYGB7cn0Kd29yZHNfY2xhc3NpZmllciA8LSBsYWJlbHNfd29yZHMgJT4lCiAgYXJyYW5nZShkZXNjKHRmX2lkZikpICU+JQogIGRpc3RpbmN0KHdvcmQsIC5rZWVwX2FsbCA9IFRSVUUpICU+JQogIHNlbGVjdCgtdGZfaWRmKQpgYGAKCmBgYHtyfQp0d2VldF9udWxsX21vZGVsIDwtIHR3ZWV0c190aWR5ICU+JQogIGlubmVyX2pvaW4obGFiZWxzX3dvcmRzLCBieSA9ICd3b3JkJykKYGBgCgpgYGB7cn0KbnVsbF9yZXMgPC0gdHdlZXRfbnVsbF9tb2RlbCAlPiUKICBncm91cF9ieShJRCkgJT4lCiAgc3VtbWFyaXNlKHRydXRoID0gbWVhbihsYWJlbHMueCwgbmEucm0gPSBUUlVFKSAlPiUgcm91bmQoMCksCiAgICAgICAgIHByZWQgPSBtZWFuKGxhYmVscy55LCBuYS5ybSA9IFRSVUUpICU+JSByb3VuZCgwKSkKYGBgCgpgYGB7cn0KdGFibGUobnVsbF9yZXMkdHJ1dGgsIG51bGxfcmVzJHByZWQpCmBgYAoKCiMjIFByZXByb2Nlc3NpbmcKCmBgYHtyfQojIE5vdGljZSwgd2UgdXNlIHRoZSBpbml0aWFsIHVudG9rZW5pemVkIHR3ZWV0cwpkYXRhIDwtIHR3ZWV0cyAlPiUKICBzZWxlY3QobGFiZWxzLCB0ZXh0KSAlPiUKICByZW5hbWUoeSA9IGxhYmVscykgJT4lCiAgbXV0YXRlKHkgPSB5ICAlPiUgYXMuZmFjdG9yKCkpIApgYGAKCgojIyBUcmFpbmluZyAmIFRlc3Qgc3BsaXQKCmBgYHtyfQpkYXRhX3NwbGl0IDwtIGluaXRpYWxfc3BsaXQoZGF0YSwgcHJvcCA9IDAuNzUsIHN0cmF0YSA9IHkpCgpkYXRhX3RyYWluIDwtIGRhdGFfc3BsaXQgICU+JSAgdHJhaW5pbmcoKQpkYXRhX3Rlc3QgPC0gZGF0YV9zcGxpdCAlPiUgdGVzdGluZygpCmBgYAoKIyMgUHJlcHJvY2Vzc2luZyBwaXBlbGluZQoKYGBge3J9CmxpYnJhcnkodGV4dHJlY2lwZXMpICMgQWRpdHRpb25hbCByZWNpcGVzIGZvciB3b3JraW5nIHdpdGggdGV4dCBkYXRhCmBgYAoKYGBge3J9CiMgVGhpcyByZWNpcGUgcHJldHR5IG11Y2ggcmVjb25zdHJ1Y3RzIGFsbCBwcmVwcm9jZXNzaW5nIHdlIGRpZCBzbyBmYXIKZGF0YV9yZWNpcGUgPC0gZGF0YV90cmFpbiAlPiUKICByZWNpcGUoeSB+LikgJT4lCiAgdGhlbWlzOjpzdGVwX2Rvd25zYW1wbGUoeSkgJT4lICMgRm9yIGRvd25zYW1wbGluZyBjbGFzcyBpbWJhbGFuY2VzIChvcHRpbWFsKQogIHN0ZXBfZmlsdGVyKCEodGV4dCAlPiUgc3RyX2RldGVjdCgnXlJUJykpKSAlPiUgIyBVcGZyb250IGZpbHRlcmluZyByZXR3ZWV0cwogIHN0ZXBfZmlsdGVyKHRleHQgIT0gIiIpICU+JQogIHN0ZXBfdG9rZW5pemUodGV4dCwgdG9rZW4gPSAidHdlZXRzIikgJT4lICMgdG9rZW5pemUKICBzdGVwX3Rva2VuZmlsdGVyKHRleHQsIG1pbl90aW1lcyA9IDc1KSAlPiUgICMgRmlsdGVyIG91dCByYXJlIHdvcmRzCiAgc3RlcF9zdG9wd29yZHModGV4dCwga2VlcCA9IEZBTFNFKSAlPiUgIyBGaWx0ZXIgc3RvcHdvcmRzCiAgc3RlcF90ZmlkZih0ZXh0KSAlPiUgIyBURklERiB3ZWlnaHRpbmcKICAjc3RlcF9wY2EoYWxsX3ByZWRpY3RvcnMoKSkgJT4lICMgRGltZW5zaW9uYWxpdHkgcmVkdWN0aW9uIHZpYSBQQ0EgKG9wdGlvbmFsKQogIHByZXAoKSAjIE5PVEU6IE9ubHkgcHJlcCB0aGUgcmVjaXBlIHdoZW4gbm90IHVzaW5nIGluIGEgd29ya2Zsb3cKYGBgCgoKYGBge3J9CmRhdGFfcmVjaXBlCmBgYAoKU2luY2Ugd2Ugd2lsbCBub3QgZG8gaHlwZXJwYXJhbWV0ZXIgdHVuaW5nLCB3ZSBkaXJlY3RseSBiYWtlL2p1aWNlIHRoZSByZWNpcGUKCmBgYHtyfQpkYXRhX3RyYWluX3ByZXAgPC0gZGF0YV9yZWNpcGUgJT4lIGp1aWNlKCkKZGF0YV90ZXN0X3ByZXAgPC0gZGF0YV9yZWNpcGUgJT4lIGJha2UoZGF0YV90ZXN0KQpgYGAKCgojIyBEZWZpbmluZyB0aGUgbW9kZWxzCgpgYGB7cn0KbW9kZWxfZW4gPC0gbG9naXN0aWNfcmVnKG1vZGUgPSAnY2xhc3NpZmljYXRpb24nLCAKICAgICAgICAgICAgICAgICAgICAgICAgIG1peHR1cmUgPSAwLjUsIAogICAgICAgICAgICAgICAgICAgICAgICAgcGVuYWx0eSA9IDAuNSkgJT4lCiAgc2V0X2VuZ2luZSgnZ2xtJywgZmFtaWx5ID0gYmlub21pYWwpIApgYGAKCgojIyBEZWZpbmUgdGhlIHdvcmtmbG93CgpXZSB3aWxsIHNraXAgdGhlIHdvcmtmbG93IHN0ZXAgdGhpcyB0aW1lLCBzaW5jZSB3ZSBkbyBub3QgZXZhbHVhdGUgZGlmZmVyZW50IG1vZGVscyBhZ2FpbnN0IGVhY2ggb3RoZXJzLgoKIyMgZml0IHRoZSBtb2RlbAoKYGBge3J9CmZpdF9lbiA8LSBtb2RlbF9lbiAlPiUgZml0KGZvcm11bGEgPSB5IH4uLCBkYXRhID0gZGF0YV90cmFpbl9wcmVwKQpgYGAKCgpgYGB7cn0KcHJlZF9jb2xsZWN0ZWQgPC0gdGliYmxlKAogIHRydXRoID0gZGF0YV90cmFpbl9wcmVwICU+JSBwdWxsKHkpLAogIHByZWQgPSBmaXRfZW4gJT4lIHByZWRpY3QobmV3X2RhdGEgPSBkYXRhX3RyYWluX3ByZXApICU+JSBwdWxsKC5wcmVkX2NsYXNzKSwKICBwcmVkX3Byb2IgPSBmaXRfZW4gJT4lIHByZWRpY3QobmV3X2RhdGEgPSBkYXRhX3RyYWluX3ByZXAsIHR5cGUgPSAicHJvYiIpICU+JSBwdWxsKC5wcmVkX1RSVUUpLAogICkgCmBgYAoKYGBge3J9CnByZWRfY29sbGVjdGVkICU+JSBjb25mX21hdCh0cnV0aCwgcHJlZCkKYGBgCgpgYGB7cn0KcHJlZF9jb2xsZWN0ZWQgJT4lIGNvbmZfbWF0KHRydXRoLCBwcmVkKSAlPiUgc3VtbWFyeSgpCmBgYAoKV2VsbC4uLiBzb3NvCgojIFVzaW5nIHRoZSBtb2RlbCBmb3IgbmV3IHByZWRpY3Rpb24KCiMjIFNpbXBsZSB0ZXN0CgpgYGB7cn0KIyBIb3cgd291bGQgdGhlIG1vZGVsIHByZWRpY3QgZ2l2ZW4gc29tZSB0d2VldCB0ZXh0CnByZWRfb3duID0gdGliYmxlKHRleHQgPSAnVVNBIFVTQSBXRSBORUVEIEEgV0FMTCBUTyBNQUtFIEFNRVJJQ0EgR1JFQVQgQUdBSU4gQU5EIEtFRVAgVEhFIE1FWElDQU5TIEFORCBBTEwgUkVBTExZIEJBRCBDT1VOVFJJRVMgT1VUISBBTU5FUklDQSBGSVJTVCcpCmBgYAoKCmBgYHtyfQpmaXRfZW4gJT4lIHByZWRpY3QobmV3X2RhdGEgPSBkYXRhX3JlY2lwZSAlPiUgYmFrZShwcmVkX293bikpCmBgYAoKCiMjIE5ldyBkYXRhCgoqIFdlIGNvdWxkIGFsc28gdXNlIHRoZSBtb2RlbCB0byBwcmVkaWN0IG9uIG5ldyBkYXRhLCBzdWNoIGFzIHRoZSBqdXN0IHNjcmFwZWQgZGlzY3Vzc2lvbiBvbiB0aGUgcHJlc2lkZW50aWFsIGRlYmF0ZS4KCmBgYHtyfQojIGRvd25sb2FkIGFuZCBvcGVuIHNvbWUgVHJ1bXAgdHdlZXRzIGZyb20gdHJ1bXBfdHdlZXRfZGF0YV9hcmNoaXZlCmxpYnJhcnkoanNvbmxpdGUpCnRtcCA8LSB0ZW1wZmlsZSgpCmRvd25sb2FkLmZpbGUoImh0dHBzOi8vZ2l0aHViLmNvbS9TRFMtQUFVL1NEUy1tYXN0ZXIvcmF3L21hc3Rlci9NMi9kYXRhL3ByZXNfZGViYXRlXzIwMjAuZ3oiLCB0bXApCgp0d2VldHNfcmF3X25ldyA8LSBzdHJlYW1faW4oZ3pmaWxlKHRtcCwgInByZXNfZGViYXRlXzIwMjAiKSkKYGBgCgpgYGB7cn0KdHdlZXRzX3Jhd19uZXcgJT4lIGdsaW1wc2UoKQpgYGAKCmBgYHtyfQp0d2VldHNfbmV3IDwtIHRpYmJsZShJRCA9IHR3ZWV0c19yYXdfbmV3JGlkWzEsXSAlPiUgdCgpICU+JSBhcy5jaGFyYWN0ZXIoKSwgCiAgICAgICAgICAgICAgICAgICAgIHRleHQgPSB0d2VldHNfcmF3X25ldyR0d2VldFsxLF0gJT4lIHQoKSAlPiUgYXMuY2hhcmFjdGVyKCkpCiNybSh0d2VldHNfcmF3X25ldykKYGBgCgpgYGB7cn0KdHdlZXRzX25ldyAlPiUgZ2xpbXBzZSgpCmBgYAoKYGBge3J9CmRhdGFfbmV3IDwtIGRhdGFfcmVjaXBlICU+JSBiYWtlKHR3ZWV0c19uZXcpCmBgYAoKYGBge3J9CmRhdGFfbmV3ICU+JSBnbGltcHNlKCkKYGBgCgoKYGBge3J9CnByZWRfbmV3IDwtIGZpdF9lbiAlPiUgcHJlZGljdChuZXdfZGF0YSA9IGRhdGFfbmV3KQpgYGAKCmBgYHtyfQp0d2VldHNfbmV3ICU8PiUKICBtdXRhdGUocHJlZCA9IHByZWRfbmV3ICU+JSBwdWxsKC5wcmVkX2NsYXNzKSkKYGBgCgpgYGB7cn0KdHdlZXRzX25ldyAlPiUgY291bnQocHJlZCkKYGBgCgpgYGB7cn0KdHdlZXRzX3RpZHlfbmV3IDwtIHR3ZWV0c19uZXcgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0LCB0b2tlbiA9ICJ0d2VldHMiKSAKYGBgCgpgYGB7cn0KIyBwcmVwcm9jZXNzaW5nCnR3ZWV0c190aWR5X25ldyAlPD4lCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnQHwjcHJlc2lkZW50aWFsJykpKSAlPiUgIyByZW1vdmUgaGFzaHRhZ3MgYW5kIG1lbnRpb25zCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnXmFtcHxeaHR0cHxedFxcLmNvJykpKSAlPiUgIyBUd2l0dGVyIHNwZWNpZmljIHN0dWZmCiMgIG11dGF0ZSh3b3JkID0gd29yZCAlPiUgc3RyX3JlbW92ZV9hbGwoJ1teWzphbG51bTpdXScpKSAlPiUgIyMgcmVtb3ZlIGFsbCBzcGVjaWFsIGNoYXJhY3RlcnMKICBmaWx0ZXIoc3RyX2xlbmd0aCh3b3JkKSA+IDIgKSAlPiUgIyBSZW1vdmUgd29yZHMgd2l0aCBsZXNzIHRoYW4gIDMgY2hhcmFjdGVycwogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPiAxMDApICU+JSAjIHJlbW92ZSB3b3JkcyBvY2N1cmluZyBsZXNzIHRoYW4gMTAwIHRpbWVzCiAgdW5ncm91cCgpICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzLCBieSA9ICd3b3JkJykgIyByZW1vdmUgc3RvcHdvcmRzCmBgYAoKCmBgYHtyfQojIFRGSURGIHdlaWdodHMKdHdlZXRzX3RpZHlfbmV3ICU8PiUKICBhZGRfY291bnQoSUQsIHdvcmQpICU+JQogIGJpbmRfdGZfaWRmKHRlcm0gPSB3b3JkLAogICAgICAgICAgICAgIGRvY3VtZW50ID0gSUQsCiAgICAgICAgICAgICAgbiA9IG4pCmBgYAoKYGBge3J9CmxhYmVsc193b3Jkc19uZXcgPC0gdHdlZXRzX3RpZHlfbmV3ICU+JQogIGdyb3VwX2J5KHByZWQpICU+JQogIGNvdW50KHdvcmQsIHd0ID0gdGZfaWRmLCBzb3J0ID0gVFJVRSwgbmFtZSA9ICJ0Zl9pZGYiKSAlPiUKICBzbGljZSgxOjIwKSAlPiUKICB1bmdyb3VwKCkgCmBgYAoKYGBge3J9Cmhhc2h0YWdzX3dvcmRzX25ldyA8LSB0d2VldHNfdGlkeV9uZXcgJT4lCiAgZmlsdGVyKHdvcmQgJT4lIHN0cl9kZXRlY3QoJyMnKSkgJT4lCiAgZ3JvdXBfYnkocHJlZCkgJT4lCiAgY291bnQod29yZCwgd3QgPSB0Zl9pZGYsIHNvcnQgPSBUUlVFLCBuYW1lID0gInRmX2lkZiIpICU+JQogIHNsaWNlKDE6MjApICU+JQogIHVuZ3JvdXAoKSAKYGBgCgpgYGB7ciwgZmlnLndpZHRoPTEwfQpsYWJlbHNfd29yZHNfbmV3ICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcl93aXRoaW4od29yZCwgYnkgPSB0Zl9pZGYsIHdpdGhpbiA9IHByZWQpKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSB3b3JkLCB5ID0gdGZfaWRmLCBmaWxsID0gcHJlZCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgbGFicyh4ID0gTlVMTCwgeSA9ICJ0Zi1pZGYiKSArCiAgZmFjZXRfd3JhcCh+cHJlZCwgbmNvbCA9IDIsIHNjYWxlcyA9ICJmcmVlIikgKwogIGNvb3JkX2ZsaXAoKSArCiAgc2NhbGVfeF9yZW9yZGVyZWQoKQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTB9Cmhhc2h0YWdzX3dvcmRzX25ldyAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXJfd2l0aGluKHdvcmQsIGJ5ID0gdGZfaWRmLCB3aXRoaW4gPSBwcmVkKSkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gd29yZCwgeSA9IHRmX2lkZiwgZmlsbCA9IHByZWQpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGxhYnMoeCA9IE5VTEwsIHkgPSAidGYtaWRmIikgKwogIGZhY2V0X3dyYXAofnByZWQsIG5jb2wgPSAyLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHNjYWxlX3hfcmVvcmRlcmVkKCkKYGBgCgpUbyBiZSBjb250aW51ZWQgYnkgeW91IDopCgpYCgpYCgpYCgpYCgpYCgoKIyBUb3BpYyBtb2RlbHMgKExEQSkgb24gbmV3IGRhdGEKCmBgYHtyfQojIGZvciBMREEgYW5hbHlzaXMKbGlicmFyeSh0b3BpY21vZGVscykKYGBgCgojIyMgUHJlcGFyaW5nIHRoZSBEYXRhCgpgYGB7cn0KIyBMREEgdmlhIHRoZSB0b3BpY21vZGVsIHBhY2thZ2UgcmVxdWlyZXMgYSBkb2N1bWVudC10ZXJtLW1hdHJpeCAoZHRtKQp0d2VldHNfZHRtIDwtIHR3ZWV0c190aWR5X25ldyAlPiUKICBjYXN0X2R0bShkb2N1bWVudCA9IElELCB0ZXJtID0gd29yZCwgdmFsdWUgPSBuKQpgYGAKCkxldHMgdGFrZSBhIGxvb2s6CgpgYGB7cn0KdHdlZXRzX2R0bQpgYGAKCiogV2Ugc2VlIGFnYWluIGhhdCB0aGUgbWF0cml4IGlzIHN0aWxsIHJhdGhlciBzcGFyc2UsIHdoaWNoIGlzIGFuIGFydGVmYWN0IG9mIHRleHQgZGF0YSBnZW5lcmFsbHksIGJ1dCBldmVuIG1vcmUgc28gd2hlbiB1c2luZyB0d2l0dGVyIGRhdGEuIAoqIExldHMgdHJ5IHRvIHNlZSBpZiB3ZSBjb3VsZCByZWR1Y2UgdGhhdCBzb21ld2hhdCBieSBkZWxldGluZyBsZXNzIG9mdGVuIHVzZWQgdGVybXMuCgpgYGB7cn0KbGlicmFyeSh0bSkKdHdlZXRzX2R0bSAlPiUgcmVtb3ZlU3BhcnNlVGVybXMoc3BhcnNlID0gLjk5KQpgYGAKCiogTm93IHdlIGNhbiBwZXJmb3JtIGEgTERBLCB1c2luZyB0aGUgbW9yZSBhY2N1cmF0ZSBHaWJicyBzYW1wbGluZyBhcyBgbWV0aG9kYC4KCmBgYHtyfQp0d2VldHNfbGRhIDwtIHR3ZWV0c19kdG0gJT4lIAogIExEQShrID0gNiwgbWV0aG9kID0gIkdpYmJzIiwKICAgICAgY29udHJvbCA9IGxpc3Qoc2VlZCA9IDEzMzcpKQpgYGAKCiMjIyAkXGJldGEkOiBXb3JkLVRvcGljIEFzc29jaWF0aW9uCgoqICRcYmV0YSQgaXMgYW4gb3V0cHV0IG9mIHRoZSBMREEgbW9kZWwsIGluZGljYXRpbmcgdGhlIHByb3BhYmlsaXR5IHRoYXQgYSB3b3JkIG9jY3VycyBpbiBhIGNlcnRhaW4gdG9waWMuCiogVGhlcmVmb3JlLCBsb2tpbmcgYXQgdGhlIHRvcCBwcm9iYWJpbGl0eSB3b3JkcyBvZiBhIHRvcGljIG9mdGVuIGdpdmVzIHVzIGEgZ29vZCBpbnR1aXRpb24gcmVnYXJkaW5nIGl0cyBwcm9wZXJ0aWVzLgoKYGBge3J9CiMgTERBIG91dHB1dCBpcyBkZWZpbmVkIGZvciB0aWR5KCksIHNvIHdlIGNhbiBlYXNpbHkgZXh0cmFjdCBpdApsZGFfYmV0YSA8LSB0d2VldHNfbGRhICU+JSAKICB0aWR5KG1hdHJpeCA9ICJiZXRhIikgJT4lCiAgZ3JvdXBfYnkodG9waWMpICU+JQogIGFycmFuZ2UodG9waWMsIGRlc2MoYmV0YSkpICU+JQogIHNsaWNlKDE6MTApICU+JQogIHVuZ3JvdXAoKSAKYGBgCgpgYGB7cn0KbGRhX2JldGEgJT4lIGhlYWQoKQpgYGAKCmBgYHtyfQojIE5vdGljZSB0aGUgInJlb3JkZXJfd2l0aGluKCkiCmxkYV9iZXRhICU+JQogIG11dGF0ZSh0ZXJtID0gcmVvcmRlcl93aXRoaW4odGVybSwgYmV0YSwgdG9waWMpKSAlPiUKICBncm91cF9ieSh0b3BpYywgdGVybSkgJT4lICAgIAogIGFycmFuZ2UoZGVzYyhiZXRhKSkgJT4lICAKICB1bmdyb3VwKCkgJT4lCiAgZ2dwbG90KGFlcyh0ZXJtLCBiZXRhLCBmaWxsID0gYXMuZmFjdG9yKHRvcGljKSkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgY29vcmRfZmxpcCgpICsKICBzY2FsZV94X3Jlb3JkZXJlZCgpICsKICBsYWJzKHRpdGxlID0gIlRvcCAxMCB0ZXJtcyBpbiBlYWNoIExEQSB0b3BpYyIsCiAgICAgICB4ID0gTlVMTCwgeSA9IGV4cHJlc3Npb24oYmV0YSkpICsKICBmYWNldF93cmFwKH4gdG9waWMsIG5jb2wgPSAyLCBzY2FsZXMgPSAiZnJlZSIpCmBgYAoKIyMjICRcZ2FtbWEkOiBEb2N1bWVudC1Ub3BpYyBBc3NvY2lhdGlvbgoKKiBJbiBMREEsIGRvY3VtZW50cyBhcmUgcmVwcmVzZW50ZWQgYXMgYSBtaXggb2YgdG9waWNzLiBUaGlzIGFzc29jaWF0aW9uIG9mIGEgZG9jdW1lbnQgdG8gYSB0b3BpYyBpcyBjYXB0dXJlZCBieSAkXGdhbW1hJAoKYGBge3J9CmxkYV9nYW1tYSA8LSB0d2VldHNfbGRhICU+JSAKICB0aWR5KG1hdHJpeCA9ICJnYW1tYSIpCmBgYAoKYGBge3J9CmxkYV9nYW1tYSAlPiUgaGVhZCgpCmBgYAoKYGBge3J9CmxkYV9nYW1tYSAlPiUKICBnZ3Bsb3QoYWVzKGdhbW1hKSkgKwogIGdlb21faGlzdG9ncmFtKCkgKwogIHNjYWxlX3lfbG9nMTAoKSArCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgcHJvYmFiaWxpdGllcyBmb3IgYWxsIHRvcGljcyIsCiAgICAgICB5ID0gIk51bWJlciBvZiBkb2N1bWVudHMiLCB4ID0gZXhwcmVzc2lvbihnYW1tYSkpCmBgYApgYGB7cn0KbGRhX2dhbW1hICU8PiUKICBsZWZ0X2pvaW4odHdlZXRzX25ldyAlPiUgc2VsZWN0KElELCBwcmVkKSwgYnkgPSBjKCdkb2N1bWVudCcgPSAnSUQnKSkKYGBgCgpgYGB7cn0KbGRhX2dhbW1hICU+JQogIGdyb3VwX2J5KHByZWQsIHRvcGljKSAlPiUKICBzdW1tYXJpc2UoZ2FtbWEgPSBzdW0oZ2FtbWEpKSAlPiUKICBhcnJhbmdlKHByZWQsIGdhbW1hKQpgYGAKCgoKCmBgYHtyfQpsZGFfZ2FtbWEgJT4lCiAgZ2dwbG90KGFlcyhnYW1tYSwgZmlsbCA9IGFzLmZhY3Rvcih0b3BpYykpKSArCiAgZ2VvbV9oaXN0b2dyYW0oc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofiB0b3BpYywgbmNvbCA9IDIpICsKICBzY2FsZV95X2xvZzEwKCkgKwogIGxhYnModGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIHByb2JhYmlsaXR5IGZvciBlYWNoIHRvcGljIiwKICAgICAgIHkgPSAiTnVtYmVyIG9mIGRvY3VtZW50cyIsIHggPSBleHByZXNzaW9uKGdhbW1hKSkKYGBgCgpgYGB7cn0KdG9wX3RvcGljcyA8LSB0d2VldHNfbGRhICU+JSAKICB0aWR5KG1hdHJpeCA9ICJnYW1tYSIpICAlPiUKICBncm91cF9ieShkb2N1bWVudCkgJT4lCiAgdG9wX24oMSwgd3QgPSBnYW1tYSkgJT4lCiAgdW5ncm91cCgpCmBgYAoKYGBge3J9CnRvcF90b3BpY3MgJT4lCiAgY291bnQodG9waWMpCmBgYAoKCmBgYHtyfQp0b3BpY21vZGVsc19qc29uX2xkYXZpcyA8LSBmdW5jdGlvbihmaXR0ZWQsIGRvY19kdG0sIG1ldGhvZCA9ICJQQ0EiLCBkb2NfaW4gPSBOVUxMLCB0b3BpY19pbiA9IE5VTEwpewogIHJlcXVpcmUodG9waWNtb2RlbHMpOyByZXF1aXJlKGRwbHlyKTsgcmVxdWlyZShMREF2aXMpCiAgCiAgIyBGaW5kIHJlcXVpcmVkIHF1YW50aXRpZXMKICBwaGkgPC0gcG9zdGVyaW9yKGZpdHRlZCkkdGVybXMgJT4lIGFzLm1hdHJpeCgpICMgVG9waWMtdGVybSBkaXN0cmlidXRpb24KICB0aGV0YSA8LSBwb3N0ZXJpb3IoZml0dGVkKSR0b3BpY3MgJT4lIGFzLm1hdHJpeCgpICMgRG9jdW1lbnQtdG9waWMgbWF0cml4CiAgCiAgIyAjIFJlc3RyaWN0IChub3Qgd29ya2luZyBhdG0pCiAgIyBpZighaXNfbnVsbChJRF9pbikpe3RoZXRhIDwtIHRoZXRhW3Jvd25hbWVzKHRoZXRhKSAlaW4lICBkb2NfaW4sXTsgZG9jX2ZtICAlPD4lIGRmbV9zdWJzZXQoZGltbmFtZXMoZG9jX2ZtKSRkb2NzICVpbiUgZG9jX2luKX0KICAKICAjIFJlc3RyaWN0CiAgaWYoIWlzX251bGwodG9waWNfaW4pKXsKICAgIHBoaSA8LSBwaGlbdG9waWNfaW4sIF0KICAgIHRoZXRhIDwtIHRoZXRhWyAsIHRvcGljX2luXQogIH0KICB0ZXh0X3RpZHkgPC0gZG9jX2R0bSAlPiUgdGlkeSgpCiAgdm9jYWIgPC0gY29sbmFtZXMocGhpKQogIGRvY19sZW5ndGggPC0gdGliYmxlKGRvY3VtZW50ID0gcm93bmFtZXModGhldGEpKSAlPiUgbGVmdF9qb2luKHRleHRfdGlkeSAlPiUgY291bnQoZG9jdW1lbnQsIHd0ID0gY291bnQpLCBieSA9ICdkb2N1bWVudCcpCiAgdGYgPC0gdGliYmxlKHRlcm0gPSB2b2NhYikgJT4lIGxlZnRfam9pbih0ZXh0X3RpZHkgJT4lIGNvdW50KHRlcm0sIHd0ID0gY291bnQpLCBieSA9ICJ0ZXJtIikgCiAgCiAgaWYobWV0aG9kID09ICJQQ0EiKXttZHMgPC0ganNQQ0F9CiAgaWYobWV0aG9kID09ICJUU05FIil7bGlicmFyeSh0c25lKTsgbWRzIDwtIGZ1bmN0aW9uKHgpe3RzbmUoc3ZkKHgpJHUpfSB9CiAgCiAgIyBDb252ZXJ0IHRvIGpzb24KICBqc29uX2xkYSA8LSBMREF2aXM6OmNyZWF0ZUpTT04ocGhpID0gcGhpLCB0aGV0YSA9IHRoZXRhLCB2b2NhYiA9IHZvY2FiLCBkb2MubGVuZ3RoID0gZG9jX2xlbmd0aCAlPiUgcHVsbChuKSwgdGVybS5mcmVxdWVuY3kgPSB0ZiAlPiUgcHVsbChuKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVvcmRlci50b3BpY3MgPSBGQUxTRSwgbWRzLm1ldGhvZCA9IG1kcyxwbG90Lm9wdHMgPSBsaXN0KHhsYWIgPSAiRGltLjEiLCB5bGFiID0gIkRpbS4yIikpIAogIHJldHVybihqc29uX2xkYSkKfQpgYGAKCgpgYGB7cn0KbGlicmFyeShMREF2aXMpCmpzb25fbGRhIDwtIHRvcGljbW9kZWxzX2pzb25fbGRhdmlzKGZpdHRlZCA9IHR3ZWV0c19sZGEsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkb2NfZHRtID0gdHdlZXRzX2R0bSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJUU05FIikKanNvbl9sZGEgJT4lIHNlclZpcygpCiMganNvbl9sZGEgJT4lIHNlclZpcyhvdXQuZGlyID0gJ0xEQXZpeicpCmBgYAoKCiMgRW5kbm90ZXMKCiMjIyBQYWNrYWdlcyAmIEVjb3N5c3RlbQoKKiBbYHRpZHl0ZXh0YF0oaHR0cHM6Ly9naXRodWIuY29tL2p1bGlhc2lsZ2UvdGlkeXRleHQpCiogW2B0ZXh0cmVjaXBlc2BdKGh0dHBzOi8vdGV4dHJlY2lwZXMudGlkeW1vZGVscy5vcmcvKQoqIFtgdG9waWNtb2RlbHNgXShodHRwczovL2NyYW4uci1wcm9qZWN0Lm9yZy93ZWIvcGFja2FnZXMvdG9waWNtb2RlbHMvdmlnbmV0dGVzL3RvcGljbW9kZWxzLnBkZikKCkZ1cnRoZXIgTkxQIHBhY2thZ2VzIGVjb3N5c3RlbQoKKiBgdG1gIFtoZXJlXShodHRwczovL2NyYW4uci1wcm9qZWN0Lm9yZy93ZWIvcGFja2FnZXMvdG0vKQoqIGBxdWFudGVkYWAgW2hlcmVdKGh0dHBzOi8vcXVhbnRlZGEuaW8vKSwgYW5kIG1hbnkgbWFueSBncmVhdCB0dXRvcmlhbHMgW2hlcmVdKGh0dHBzOi8vdHV0b3JpYWxzLnF1YW50ZWRhLmlvLykKCgojIyMgUmVmZXJlbmNlcyAKCiogSnVsaWEgU2lsZ2UgYW5kIERhdmlkIFJvYmluc29uICgyMDIwKS4gVGV4dCBNaW5pbmcgd2l0aCBSOiBBIFRpZHkgQXBwcm9hY2gsIE/igJlSZWlsbHkuIE9ubGluZSBhdmFpbGFibGUgW2hlcmVdKGh0dHBzOi8vd3d3LnRpZHl0ZXh0bWluaW5nLmNvbS8pCiAgICogW0NoYXB0ZXIgNl0oaHR0cHM6Ly93d3cudGlkeXRleHRtaW5pbmcuY29tL3RvcGljbW9kZWxpbmcuaHRtbCk6IEludHJvZHVjdGlvbiB0b3BpYyBtb2RlbHMKKiBFbWlsIEh2aWRmZWxkdCBhbmQgSnVsaWEgU2lsZ2UgKDIwMjApLiBTdXBlcnZpc2VkIE1hY2hpbmUgTGVhcm5pbmcgZm9yIFRleHQgQW5hbHlzaXMgaW4gUiwgb25saW5lIGF2YWlsYWJsZSBbaGVyZV0oaHR0cHM6Ly9zbWx0YXIuY29tLykKICAgKiBbQ2hhcHRlciA3XShodHRwczovL3NtbHRhci5jb20vbWxjbGFzc2lmaWNhdGlvbi5odG1sKTogQ2xhc3NpZmljYXRpb24KCiMjIyBGdXJ0aGVyIHNvdXJjZXMKCkRhdGFjYW1wCgoqICBbVG9waWMgTW9kZWxpbmcgaW4gUl0oaHR0cHM6Ly9sZWFybi5kYXRhY2FtcC5jb20vY291cnNlcy90b3BpYy1tb2RlbGluZy1pbi1yKSAKCk90aGVyIG9ubGluZQoKKiBbSnVsaWEgU2lsZ2UncyBCbG9nXShodHRwczovL2p1bGlhc2lsZ2UuY29tLyk6IEZ1bGwgb2YgZ3JlYXQgZXhhbXBsZXMgb2YgcHJlZGljdGl2ZSBtb2RlbGluZywgTkxQLCBhbmQgdGhlIGNvbWJpbmF0aW9uIGZvIGJvdGgsIHVzaW5nIHRpZHkgZWNvc3lzdGVtcwoKIyMjIFNlc3Npb24gSW5mbwoKYGBge3J9CnNlc3Npb25JbmZvKCkKYGBgCgo=