### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidytext)

Download the data

# download and open some Trump tweets from trump_tweet_data_archive
library(jsonlite)
tmp <- tempfile()
download.file("https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pol_tweets.gz", tmp)
trying URL 'https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pol_tweets.gz'
Content type 'application/octet-stream' length 7342085 bytes (7.0 MB)
==================================================
downloaded 7.0 MB
tweets_raw <- stream_in(gzfile(tmp, "pol_tweets"))

 Found 1 records...
 Imported 1 records. Simplifying...
tweets_raw %>% glimpse()
Rows: 1
Columns: 2
$ text   <df[,50000]> <data.frame[1 x 50000]>
$ labels <df[,50000]> <data.frame[1 x 50000]>
tweets <- tibble(ID = colnames(tweets_raw[[1]]), 
                 text = tweets_raw[[1]] %>% as.character(), 
                 labels = tweets_raw[[2]] %>% as.logical())
#rm(tweets_raw)
tweets %>% head()
tweets %<>%
  filter(!(text %>% str_detect('^RT'))) # Filter retweets
tweets %>% head()

Tidying

tweets_tidy <- tweets %>%
  unnest_tokens(word, text, token = "tweets") 
Using `to_lower = TRUE` with `token = 'tweets'` may not preserve URLs.
tweets_tidy %>% head(50)
tweets_tidy %>% count(word, sort = TRUE)

Preprocessing

# preprocessing
tweets_tidy %<>%
  filter(!(word %>% str_detect('@'))) %>% # remove mentions
  filter(!(word %>% str_detect('^amp|^http|^t\\.co'))) %>% # Twitter specific stuff
#  mutate(word = word %>% str_remove_all('[^[:alnum:]]')) %>% ## remove all special characters
  filter(str_length(word) > 2 ) %>% # Remove words with less than  3 characters
  group_by(word) %>%
  filter(n() > 100) %>% # remove words occuring less than 100 times
  ungroup() %>%
  anti_join(stop_words, by = 'word') # remove stopwords

TFIDF weighting

# top words
tweets_tidy %>%
  count(word, sort = TRUE) %>%
  head(20)
# TFIDF topwords
tweets_tidy %>%
  count(word, wt = tf_idf, sort = TRUE) %>%
  head(20)

Inspecting

Words by party affiliation

labels_words <- tweets_tidy %>%
  group_by(labels) %>%
  count(word, wt = tf_idf, sort = TRUE, name = "tf_idf") %>%
  slice(1:100) %>%
  ungroup() 
labels_words %>%
  mutate(word = reorder_within(word, by = tf_idf, within = labels)) %>%
  ggplot(aes(x = word, y = tf_idf, fill = labels)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~labels, ncol = 2, scales = "free") +
  coord_flip() +
  scale_x_reordered()

Sentiments?

sentiment_tweet <- tweets_tidy %>%
  inner_join(get_sentiments("bing"))

… To be continued by you

Towards prediction?

tweets_dtm %<>% mutate(across(everything(), .fns = ~replace_na(.,0))) 
rm(tweets_dtm)

Simple manual baseline

tweet_null_model <- tweets_tidy %>%
  inner_join(labels_words, by = 'word')
table(null_res$pred, null_res$truth)
   
        0     1
  0  8859 11501
  1  2588  9048

Predictive model

library(tidymodels)

Preprocessing

# Notice, we use the initial untokenized tweets
data <- tweets %>%
  select(labels, text) %>%
  rename(y = labels) %>%
  mutate(y = y  %>% as.factor()) 

Training & Test split

data_split <- initial_split(data, prop = 0.75, strata = y)

data_train <- data_split  %>%  training()
data_test <- data_split %>% testing()
data_train %>% count(y)

Preprocessing pipeline

library(textrecipes) # Adittional recipes for working with text data
# This recipe pretty much reconstructs all preprocessing we did so far
data_recipe <- data_train %>%
  recipe(y ~.) %>%
  themis::step_upsample(y) %>% # For up/downsampling class imbalances (optimal)
  step_filter(!(text %>% str_detect('^RT'))) %>% # Upfront filtering retweets
  step_filter(text != "") %>%
  # textreciepes
  step_tokenize(text, token = "tweets") %>% # tokenize
  step_tokenfilter(text, min_times = 75) %>%  # Filter out rare words
  step_stopwords(text, keep = FALSE) %>% # Filter stopwords
  step_tfidf(text) %>% # TFIDF weighting
  #step_pca(all_predictors()) %>% # Dimensionality reduction via PCA (optional)
  prep()
data_recipe
Data Recipe

Inputs:

Training data contained 26239 data points and no missing data.

Operations:

Up-sampling based on y [trained]
Row filtering [trained]
Row filtering [trained]
Tokenization for text [trained]
Text filtering for text [trained]
Stop word removal for text [trained]
Term frequency-inverse document frequency with text [trained]

Since we will not do hyperparameter tuning, we directly bake/juice the recipe

data_train_prep <- data_recipe %>% juice()
data_test_prep <- data_recipe %>% bake(data_test)

Defining the models

model_null <- null_model(mode = 'classification')
model_en <- logistic_reg(mode = 'classification',
                         mixture = 0.5,
                         penalty = 0.5) %>%
  set_engine('glm', family = binomial) 

Define the workflow

We will skip the workflow step this time, since we do not evaluate different models against each others.

fit the model

fit_null <- model_null %>% fit(formula = y ~., data = data_train_prep)
fit_en <- model_en %>% fit(formula = y ~., data = data_train_prep)
pred_collected <- tibble(
  truth = data_test_prep %>% pull(y),
  pred = fit_en %>% predict(new_data = data_test_prep) %>% pull(.pred_class),
  pred_prob = fit_en %>% predict(new_data = data_test_prep, type = "prob") %>% pull(.pred_TRUE),
  ) 
pred_collected %>% conf_mat(truth, pred) %>% autoplot(type = 'heatmap')

pred_collected %>% conf_mat(truth, pred) %>% summary()

Well… soso

Using the model for new prediction

Simple test

# How would the model predict given some tweet text
pred_own = tibble(text = 'trump is really bad. we need more green energy  to save the enviroment and fuuture of our children')
fit_en %>% predict(new_data = data_recipe %>% bake(pred_own))

Prediction on new tweets

New data

  • We could also use the model to predict on new data, such as the just scraped discussion on the presidential debate.
# download and open some Trump tweets from trump_tweet_data_archive
download.file("https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pres_debate_2020.gz", tmp)
trying URL 'https://github.com/SDS-AAU/SDS-master/raw/master/M2/data/pres_debate_2020.gz'
Content type 'application/octet-stream' length 2095527 bytes (2.0 MB)
==================================================
downloaded 2.0 MB
tweets_raw_new <- stream_in(gzfile(tmp, "pres_debate_2020"))

 Found 1 records...
 Imported 1 records. Simplifying...
tweets_raw_new %>% glimpse()
Rows: 1
Columns: 33
$ id              <df[,8811]> <data.frame[1 x 8811]>
$ conversation_id <df[,8811]> <data.frame[1 x 8811]>
$ created_at      <df[,8811]> <data.frame[1 x 8811]>
$ date            <df[,8811]> <data.frame[1 x 8811]>
$ timezone        <df[,8811]> <data.frame[1 x 8811]>
$ place           <df[,8811]> <data.frame[1 x 8811]>
$ tweet           <df[,8811]> <data.frame[1 x 8811]>
$ language        <df[,8811]> <data.frame[1 x 8811]>
$ hashtags        <df[,8811]> <data.frame[1 x 8811]>
$ cashtags        <df[,8811]> <data.frame[1 x 8811]>
$ user_id         <df[,8811]> <data.frame[1 x 8811]>
$ user_id_str     <df[,8811]> <data.frame[1 x 8811]>
$ username        <df[,8811]> <data.frame[1 x 8811]>
$ name            <df[,8811]> <data.frame[1 x 8811]>
$ day             <df[,8811]> <data.frame[1 x 8811]>
$ hour            <df[,8811]> <data.frame[1 x 8811]>
$ link            <df[,8811]> <data.frame[1 x 8811]>
$ urls            <df[,8811]> <data.frame[1 x 8811]>
$ photos          <df[,8811]> <data.frame[1 x 8811]>
$ video           <df[,8811]> <data.frame[1 x 8811]>
$ thumbnail       <df[,8811]> <data.frame[1 x 8811]>
$ nlikes          <df[,8811]> <data.frame[1 x 8811]>
$ nreplies        <df[,8811]> <data.frame[1 x 8811]>
$ nretweets       <df[,8811]> <data.frame[1 x 8811]>
$ quote_url       <df[,8811]> <data.frame[1 x 8811]>
$ search          <df[,8811]> <data.frame[1 x 8811]>
$ near            <df[,8811]> <data.frame[1 x 8811]>
$ geo             <df[,8811]> <data.frame[1 x 8811]>
$ source          <df[,8811]> <data.frame[1 x 8811]>
$ reply_to        <df[,8811]> <data.frame[1 x 8811]>
$ translate       <df[,8811]> <data.frame[1 x 8811]>
$ trans_src       <df[,8811]> <data.frame[1 x 8811]>
$ trans_dest      <df[,8811]> <data.frame[1 x 8811]>
tweets_new <- tibble(ID = tweets_raw_new$id[1,] %>% t() %>% as.character(), 
                     text = tweets_raw_new$tweet[1,] %>% t() %>% as.character())
#rm(tweets_raw_new)
tweets_new %>% glimpse()
Rows: 8,811
Columns: 2
$ ID   <chr> "1318944772183281664", "1318938583122743296", "1318932554897031169", "1318928783169245185", "…
$ text <chr> "Still time to register: Students can join the @UVADemocracy Student Advisory Council for a s…

Doing a prediction

data_new <- data_recipe %>% bake(tweets_new)
data_new %>% glimpse()
Rows: 8,811
Columns: 42
$ `tfidf_text_#COVID19`  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_act         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_american    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_americans   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ tfidf_text_amp         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ tfidf_text_bill        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_can         <dbl> 1.4189092, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ tfidf_text_care        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1.670…
$ tfidf_text_congress    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_coronavirus <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_country     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_covid19     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_day         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_families    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_get         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_great       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_health      <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1.676…
$ tfidf_text_help        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_house       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_im          <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_just        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_make        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_must        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_need        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_new         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_now         <dbl> 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.0…
$ tfidf_text_one         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_pandemic    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_people      <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_president   <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_qt          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_senate      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_state       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_support     <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ tfidf_text_thank       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_time        <dbl> 1.4925070, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ tfidf_text_today       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_trump       <dbl> 0.0000000, 2.1272525, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ tfidf_text_us          <dbl> 0.000000, 0.000000, 3.432999, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_vote        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tfidf_text_work        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
$ tfidf_text_workers     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

Exploring the new data & predictions

data_new %<>%
  bind_cols(pred_new) %>%
  rename(pred = .pred_class) %>%
  bind_cols(pred_prob_new) %>%
  rename(pred_prob = .pred_TRUE) 
# preprocessing
tweets_tidy_new %<>%
  filter(!(word %>% str_detect('@|#presidential'))) %>% # remove hashtags and mentions
  filter(!(word %>% str_detect('^amp|^http|^t\\.co'))) %>% # Twitter specific stuff
#  mutate(word = word %>% str_remove_all('[^[:alnum:]]')) %>% ## remove all special characters
  filter(str_length(word) > 2 ) %>% # Remove words with less than  3 characters
  group_by(word) %>%
  filter(n() > 100) %>% # remove words occuring less than 100 times
  ungroup() %>%
  anti_join(stop_words, by = 'word') # remove stopwords
# TFIDF weights
tweets_tidy_new %<>%
  add_count(ID, word) %>%
  distinct(ID, word, .keep_all = TRUE) %>%
  bind_tf_idf(term = word,
              document = ID,
              n = n)
labels_words_new <- tweets_tidy_new %>%
  group_by(pred) %>%
  count(word, wt = tf_idf, sort = TRUE, name = "tf_idf") %>%
  slice(1:20) %>%
  ungroup() 
labels_words_new %>%
  mutate(word = reorder_within(word, by = tf_idf, within = pred)) %>%
  ggplot(aes(x = word, y = tf_idf, fill = pred)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~pred, ncol = 2, scales = "free") +
  coord_flip() +
  scale_x_reordered()

Topic models (LDA) on new data

# for LDA analysis
library(topicmodels)

Preparing the Data

# LDA via the topicmodel package requires a document-term-matrix (dtm)
tweets_dtm <- tweets_tidy_new %>%
  cast_dtm(document = ID, term = word, value = n)

Lets take a look:

tweets_dtm
<<DocumentTermMatrix (documents: 7567, terms: 95)>>
Non-/sparse entries: 22637/696228
Sparsity           : 97%
Maximal term length: 27
Weighting          : term frequency (tf)
  • We see again hat the matrix is still rather sparse, which is an artefact of text data generally, but even more so when using twitter data.
  • Lets try to see if we could reduce that somewhat by deleting less often used terms.
library(tm)
tweets_dtm %>% removeSparseTerms(sparse = .99)
<<DocumentTermMatrix (documents: 7567, terms: 95)>>
Non-/sparse entries: 22637/696228
Sparsity           : 97%
Maximal term length: 27
Weighting          : term frequency (tf)
  • Now we can perform a LDA, using the more accurate Gibbs sampling as method.
tweets_lda <- tweets_dtm %>% 
  LDA(k = 6, method = "Gibbs",
      control = list(seed = 1337))

\(\beta\): Word-Topic Association

  • \(\beta\) is an output of the LDA model, indicating the propability that a word occurs in a certain topic.
  • Therefore, loking at the top probability words of a topic often gives us a good intuition regarding its properties.
# LDA output is defined for tidy(), so we can easily extract it
lda_beta <- tweets_lda %>% 
  tidy(matrix = "beta") %>%
  group_by(topic) %>%
  arrange(topic, desc(beta)) %>%
  slice(1:10) %>%
  ungroup() 
lda_beta %>% head()
# Notice the "reorder_within()"
lda_beta %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  group_by(topic, term) %>%    
  arrange(desc(beta)) %>%  
  ungroup() %>%
  ggplot(aes(term, beta, fill = as.factor(topic))) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_x_reordered() +
  labs(title = "Top 10 terms in each LDA topic",
       x = NULL, y = expression(beta)) +
  facet_wrap(~ topic, ncol = 2, scales = "free")

\(\gamma\): Document-Topic Association

  • In LDA, documents are represented as a mix of topics. This association of a document to a topic is captured by \(\gamma\)
lda_gamma <- tweets_lda %>% 
  tidy(matrix = "gamma")
lda_gamma %>% head()
lda_gamma %>%
  ggplot(aes(gamma)) +
  geom_histogram() +
  scale_y_log10() +
  labs(title = "Distribution of probabilities for all topics",
       y = "Number of documents", x = expression(gamma))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

lda_gamma %<>%
  left_join(tweets_new %>% select(ID, pred), by = c('document' = 'ID'))
lda_gamma %>%
  group_by(pred, topic) %>%
  summarise(gamma = sum(gamma)) %>%
  arrange(pred, gamma)
`summarise()` has grouped output by 'pred'. You can override using the `.groups` argument.
lda_gamma %>%
  ggplot(aes(gamma, fill = as.factor(topic))) +
  geom_histogram(show.legend = FALSE) +
  facet_wrap(~ topic, ncol = 2) +
  scale_y_log10() +
  labs(title = "Distribution of probability for each topic",
       y = "Number of documents", x = expression(gamma))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

topicmodels_json_ldavis <- function(fitted, doc_dtm, method = "PCA", doc_in = NULL, topic_in = NULL){
  require(topicmodels); require(dplyr); require(LDAvis)
  
  # Find required quantities
  phi <- posterior(fitted)$terms %>% as.matrix() # Topic-term distribution
  theta <- posterior(fitted)$topics %>% as.matrix() # Document-topic matrix
  
  # Restrict
  if(!is_null(topic_in)){
    phi <- phi[topic_in, ]
    theta <- theta[ , topic_in]
  }
  text_tidy <- doc_dtm %>% tidy()
  vocab <- colnames(phi)
  doc_length <- tibble(document = rownames(theta)) %>% left_join(text_tidy %>% count(document, wt = count), by = 'document')
  tf <- tibble(term = vocab) %>% left_join(text_tidy %>% count(term, wt = count), by = "term") 
  
  if(method == "PCA"){mds <- jsPCA}
  if(method == "TSNE"){library(tsne); mds <- function(x){tsne(svd(x)$u)} }
  
  # Convert to json
  json_lda <- LDAvis::createJSON(phi = phi, theta = theta, vocab = vocab, doc.length = doc_length %>% pull(n), term.frequency = tf %>% pull(n),
                                 reorder.topics = FALSE, mds.method = mds,plot.opts = list(xlab = "Dim.1", ylab = "Dim.2")) 
  return(json_lda)
}
library(LDAvis)
json_lda <- topicmodels_json_ldavis(fitted = tweets_lda, 
                                    doc_dtm = tweets_dtm, 
                                    method = "TSNE")
json_lda %>% serVis()
# json_lda %>% serVis(out.dir = 'LDAviz')

Model explainability

Global

fit_en %>% vip::vip()

Local

library(lime)
explanation <- .load_image_example()
plot_image_explanation(explanation)

lime_tweets <- lime(data_new, fit_en)
recipe_deploy <- function(data){
  data_recipe %>% bake(data)
}
lime_tweets
$model
parsnip model object

Fit time:  561ms 

Call:  stats::glm(formula = y ~ ., family = ~binomial, data = data)

Coefficients:
           (Intercept)   `tfidf_text_#COVID19`          tfidf_text_act     tfidf_text_american  
             -0.405834                0.199698                0.202027               -0.103218  
  tfidf_text_americans          tfidf_text_amp         tfidf_text_bill          tfidf_text_can  
              0.349421                0.101057                0.017583                0.145496  
       tfidf_text_care     tfidf_text_congress  tfidf_text_coronavirus      tfidf_text_country  
              0.407076                0.216416                0.007831                0.116795  
    tfidf_text_covid19          tfidf_text_day     tfidf_text_families          tfidf_text_get  
              0.119973                0.045005                0.252039                0.233111  
      tfidf_text_great       tfidf_text_health         tfidf_text_help        tfidf_text_house  
             -0.540967                0.561288                0.045008                0.240416  
         tfidf_text_im         tfidf_text_just         tfidf_text_make         tfidf_text_must  
              0.312304                0.104538                0.380366                0.509962  
       tfidf_text_need          tfidf_text_new          tfidf_text_now          tfidf_text_one  
              0.431828                0.169590                0.311848                0.073044  
   tfidf_text_pandemic       tfidf_text_people    tfidf_text_president           tfidf_text_qt  
              0.393016                0.511551                0.031177               -0.165128  
     tfidf_text_senate        tfidf_text_state      tfidf_text_support        tfidf_text_thank  
             -0.022831               -0.069334               -0.029238                0.035723  
       tfidf_text_time        tfidf_text_today        tfidf_text_trump           tfidf_text_us  
              0.043689                0.217029                1.126121                0.090043  
       tfidf_text_vote         tfidf_text_work      tfidf_text_workers  
              0.340436               -0.023399                0.388925  

Degrees of Freedom: 33281 Total (i.e. Null);  33239 Residual
Null Deviance:      46140 
Residual Deviance: 44410    AIC: 44490

$preprocess
function (x) 
x
<bytecode: 0x7fbd76c45f68>
<environment: 0x7fbfc195c518>

$bin_continuous
[1] TRUE

$n_bins
[1] 4

$quantile_bins
[1] TRUE

$use_density
[1] TRUE

$feature_type
   tfidf_text_#COVID19         tfidf_text_act    tfidf_text_american   tfidf_text_americans 
             "numeric"              "numeric"              "numeric"              "numeric" 
        tfidf_text_amp        tfidf_text_bill         tfidf_text_can        tfidf_text_care 
             "numeric"              "numeric"              "numeric"              "numeric" 
   tfidf_text_congress tfidf_text_coronavirus     tfidf_text_country     tfidf_text_covid19 
             "numeric"              "numeric"              "numeric"              "numeric" 
        tfidf_text_day    tfidf_text_families         tfidf_text_get       tfidf_text_great 
             "numeric"              "numeric"              "numeric"              "numeric" 
     tfidf_text_health        tfidf_text_help       tfidf_text_house          tfidf_text_im 
             "numeric"              "numeric"              "numeric"              "numeric" 
       tfidf_text_just        tfidf_text_make        tfidf_text_must        tfidf_text_need 
             "numeric"              "numeric"              "numeric"              "numeric" 
        tfidf_text_new         tfidf_text_now         tfidf_text_one    tfidf_text_pandemic 
             "numeric"              "numeric"              "numeric"              "numeric" 
     tfidf_text_people   tfidf_text_president          tfidf_text_qt      tfidf_text_senate 
             "numeric"              "numeric"             "constant"              "numeric" 
      tfidf_text_state     tfidf_text_support       tfidf_text_thank        tfidf_text_time 
             "numeric"              "numeric"              "numeric"              "numeric" 
      tfidf_text_today       tfidf_text_trump          tfidf_text_us        tfidf_text_vote 
             "numeric"              "numeric"              "numeric"              "numeric" 
       tfidf_text_work     tfidf_text_workers                   pred            .pred_FALSE 
             "numeric"              "numeric"               "factor"              "numeric" 
             pred_prob 
             "numeric" 

$bin_cuts
$bin_cuts$`tfidf_text_#COVID19`
[1] 0.0000000 0.7582041 1.5164082 2.2746123 3.0328163

$bin_cuts$tfidf_text_act
[1] 0.000000 1.487717 2.975434 4.463152 5.950869

$bin_cuts$tfidf_text_american
[1] 0.000000 1.069362 2.138725 3.208087 4.277450

$bin_cuts$tfidf_text_americans
[1] 0.000000 1.117573 2.235147 3.352720 4.470293

$bin_cuts$tfidf_text_amp
[1] 0.0000000 0.7427382 1.4854765 2.2282147 2.9709530

$bin_cuts$tfidf_text_bill
[1] 0.000000 1.695576 3.391153 5.086729 6.782305

$bin_cuts$tfidf_text_can
[1] 0.0000000 0.7094546 1.4189092 2.1283638 2.8378184

$bin_cuts$tfidf_text_care
[1] 0.000000 1.253223 2.506446 3.759669 5.012893

$bin_cuts$tfidf_text_congress
[1] 0.000000 1.868721 3.737443 5.606164 7.474886

$bin_cuts$tfidf_text_coronavirus
[1] 0.000000 1.349852 2.699703 4.049555 5.399406

$bin_cuts$tfidf_text_country
[1] 0.000000 1.168573 2.337146 3.505719 4.674292

$bin_cuts$tfidf_text_covid19
[1] 0.000000 1.129999 2.259997 3.389996 4.519994

$bin_cuts$tfidf_text_day
[1] 0.000000 1.122468 2.244936 3.367404 4.489872

$bin_cuts$tfidf_text_families
[1] 0.000000 1.510404 3.020807 4.531211 6.041614

$bin_cuts$tfidf_text_get
[1] 0.0000000 0.7646984 1.5293968 2.2940952 3.0587936

$bin_cuts$tfidf_text_great
[1] 0.000000 1.101176 2.202352 3.303528 4.404703

$bin_cuts$tfidf_text_health
[1] 0.000000 1.257469 2.514937 3.772406 5.029874

$bin_cuts$tfidf_text_help
[1] 0.000000 1.244945 2.489891 3.734836 4.979782

$bin_cuts$tfidf_text_house
[1] 0.000000 1.279865 2.559731 3.839596 5.119461

$bin_cuts$tfidf_text_im
[1] 0.0000000 0.8640819 1.7281638 2.5922458 3.4563277

$bin_cuts$tfidf_text_just
[1] 0.000000 0.685848 1.371696 2.057544 2.743392

$bin_cuts$tfidf_text_make
[1] 0.0000000 0.9671067 1.9342134 2.9013201 3.8684268

$bin_cuts$tfidf_text_must
[1] 0.000000 1.174615 2.349229 3.523844 4.698458

$bin_cuts$tfidf_text_need
[1] 0.0000000 0.9024811 1.8049623 2.7074434 3.6099246

$bin_cuts$tfidf_text_new
[1] 0.000000 1.046605 2.093209 3.139814 4.186419

$bin_cuts$tfidf_text_now
[1] 0.0000000 0.7902302 1.5804604 2.3706905 3.1609207

$bin_cuts$tfidf_text_one
[1] 0.0000000 0.7829797 1.5659594 2.3489390 3.1319187

$bin_cuts$tfidf_text_pandemic
[1] 0.000000 1.447745 2.895490 4.343234 5.790979

$bin_cuts$tfidf_text_people
[1] 0.0000000 0.8210539 1.6421079 2.4631618 3.2842158

$bin_cuts$tfidf_text_president
[1] 0.0000000 0.7250178 1.4500356 2.1750534 2.9000712

$bin_cuts$tfidf_text_qt
NULL

$bin_cuts$tfidf_text_senate
[1] 0.000000 1.721888 3.443776 5.165664 6.887553

$bin_cuts$tfidf_text_state
[1] 0.000000 1.405411 2.810823 4.216234 5.621646

$bin_cuts$tfidf_text_support
[1] 0.000000 1.284599 2.569198 3.853798 5.138397

$bin_cuts$tfidf_text_thank
[1] 0.000000 1.187163 2.374326 3.561489 4.748652

$bin_cuts$tfidf_text_time
[1] 0.0000000 0.7462535 1.4925070 2.2387605 2.9850141

$bin_cuts$tfidf_text_today
[1] 0.000000 1.162676 2.325353 3.488029 4.650706

$bin_cuts$tfidf_text_trump
[1] 0.0000000 0.5318131 1.0636262 1.5954394 2.1272525

$bin_cuts$tfidf_text_us
[1] 0.0000000 0.8582498 1.7164996 2.5747494 3.4329992

$bin_cuts$tfidf_text_vote
[1] 0.0000000 0.9377365 1.8754729 2.8132094 3.7509459

$bin_cuts$tfidf_text_work
[1] 0.000000 1.217984 2.435968 3.653952 4.871937

$bin_cuts$tfidf_text_workers
[1] 0.000000 1.996371 3.992742 5.989113 7.985484

$bin_cuts$pred
NULL

$bin_cuts$.pred_FALSE
        0%        25%        50%        75%       100% 
0.06297783 0.38484544 0.55076602 0.60008848 0.94205857 

$bin_cuts$pred_prob
        0%        25%        50%        75%       100% 
0.05794143 0.39991152 0.44923398 0.61515456 0.93702217 


$feature_distribution
$feature_distribution$`tfidf_text_#COVID19`

           1            2            3            4 
0.9564181137 0.0253092725 0.0003404835 0.0179321303 

$feature_distribution$tfidf_text_act

           1            2            3            4 
0.9980705936 0.0012484395 0.0001134945 0.0005674725 

$feature_distribution$tfidf_text_american

          1           2           3           4 
0.991147429 0.007263648 0.000226989 0.001361934 

$feature_distribution$tfidf_text_americans

          1           2           4 
0.990806946 0.006015208 0.003177846 

$feature_distribution$tfidf_text_amp

         1          2          3          4 
0.95471570 0.02712518 0.00329134 0.01486778 

$feature_distribution$tfidf_text_bill

           1            2            3            4 
0.9989785495 0.0006809670 0.0001134945 0.0002269890 

$feature_distribution$tfidf_text_can

          1           2           3           4 
0.946657587 0.031324481 0.001361934 0.020655998 

$feature_distribution$tfidf_text_care

          1           2           4 
0.995006242 0.003858813 0.001134945 

$feature_distribution$tfidf_text_congress

           1            2            4 
0.9994325275 0.0003404835 0.0002269890 

$feature_distribution$tfidf_text_coronavirus

           1            2            4 
0.9972761321 0.0021563954 0.0005674725 

$feature_distribution$tfidf_text_country

          1           2           4 
0.992622858 0.004880263 0.002496879 

$feature_distribution$tfidf_text_covid19

          1           2           4 
0.992622858 0.004766769 0.002610373 

$feature_distribution$tfidf_text_day

           1            2            3            4 
0.9905799569 0.0049937578 0.0001134945 0.0043127908 

$feature_distribution$tfidf_text_families

           1            2            4 
0.9984110771 0.0014754284 0.0001134945 

$feature_distribution$tfidf_text_get

           1            2            3            4 
0.9582340256 0.0263307230 0.0005674725 0.0148677789 

$feature_distribution$tfidf_text_great

           1            2            3            4 
0.9891045284 0.0060152083 0.0001134945 0.0047667688 

$feature_distribution$tfidf_text_health

           1            2            3            4 
0.9947792532 0.0038588128 0.0003404835 0.0010214505 

$feature_distribution$tfidf_text_help

          1           2           4 
0.994211781 0.003631824 0.002156395 

$feature_distribution$tfidf_text_house

          1           2           3           4 
0.994552264 0.002610373 0.000226989 0.002610373 

$feature_distribution$tfidf_text_im

          1           2           3           4 
0.973328794 0.015208262 0.000680967 0.010781977 

$feature_distribution$tfidf_text_just

           1            2            3            4 
0.9407558733 0.0297355578 0.0007944615 0.0287141074 

$feature_distribution$tfidf_text_make

           1            2            3            4 
0.9820678697 0.0111224606 0.0001134945 0.0066961752 

$feature_distribution$tfidf_text_must

          1           2           3           4 
0.992963341 0.004199296 0.000226989 0.002610373 

$feature_distribution$tfidf_text_need

           1            2            3            4 
0.9779820679 0.0115764385 0.0005674725 0.0098740211 

$feature_distribution$tfidf_text_new

          1           2           3           4 
0.986267166 0.007377142 0.000226989 0.006128703 

$feature_distribution$tfidf_text_now

          1           2           3           4 
0.963227783 0.019975031 0.000453978 0.016343207 

$feature_distribution$tfidf_text_one

         1          2          3          4 
0.96107139 0.02133697 0.00102145 0.01657020 

$feature_distribution$tfidf_text_pandemic

          1           2           4 
0.997616616 0.002156395 0.000226989 

$feature_distribution$tfidf_text_people

           1            2            3            4 
0.9712858926 0.0203155147 0.0005674725 0.0078311202 

$feature_distribution$tfidf_text_president

          1           2           3           4 
0.951083872 0.030076041 0.001134945 0.017705141 

$feature_distribution$tfidf_text_qt
[1] NA

$feature_distribution$tfidf_text_senate

           1            2            4 
0.9990920440 0.0007944615 0.0001134945 

$feature_distribution$tfidf_text_state

          1           2           4 
0.997162638 0.001929406 0.000907956 

$feature_distribution$tfidf_text_support

           1            2            3            4 
0.9950062422 0.0031778459 0.0001134945 0.0017024174 

$feature_distribution$tfidf_text_thank

          1           2           3           4 
0.992736352 0.003404835 0.000226989 0.003631824 

$feature_distribution$tfidf_text_time

           1            2            3            4 
0.9550561798 0.0255362615 0.0005674725 0.0188400863 

$feature_distribution$tfidf_text_today

          1           2           4 
0.991714902 0.003518329 0.004766769 

$feature_distribution$tfidf_text_trump

          1           2           3           4 
0.882306208 0.062308478 0.003518329 0.051866984 

$feature_distribution$tfidf_text_us

           1            2            3            4 
0.9734422880 0.0148677789 0.0007944615 0.0108954716 

$feature_distribution$tfidf_text_vote

           1            2            3            4 
0.9804789468 0.0111224606 0.0007944615 0.0076041312 

$feature_distribution$tfidf_text_work

          1           2           4 
0.994098286 0.003631824 0.002269890 

$feature_distribution$tfidf_text_workers

           1            2            4 
0.9997730110 0.0001134945 0.0001134945 

$feature_distribution$pred

    FALSE      TRUE 
0.6044717 0.3955283 

$feature_distribution$.pred_FALSE

         1          2          3          4 
0.25002837 0.25286574 0.47758484 0.01952105 

$feature_distribution$pred_prob

        1         2         3         4 
0.4436500 0.0577687 0.2485529 0.2500284 


attr(,"class")
[1] "data_frame_explainer" "explainer"            "list"                
explained_tweets <-  explain(x = data_new %>% sample_n(8), 
                         explainer = lime_tweets, 
                         n_permutations = 5000,
                         dist_fun = "gower",
                         kernel_width = 0.75,
                         n_features = 10, 
                         feature_select = "highest_weights",
                         n_labels = 1 # to have the predicted class as baseline
                         # labels = "Yes" # to have te positive class as baseline
                         )
explained_tweets %>% plot_features()

explained_tweets %>% plot_text_explanations()
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
Error: original_text is not a string (a length one character vector).

Endnotes

Packages & Ecosystem

Further NLP packages ecosystem

References

  • Julia Silge and David Robinson (2020). Text Mining with R: A Tidy Approach, O’Reilly. Online available here
  • Emil Hvidfeldt and Julia Silge (2020). Supervised Machine Learning for Text Analysis in R, online available here

Further sources

Datacamp

Other online

  • Julia Silge’s Blog: Full of great examples of predictive modeling, NLP, and the combination fo both, using tidy ecosystems

Session Info

sessionInfo()
LS0tCnRpdGxlOiAnTkxQIHdvcmtzaG9wIC0gRXhwbG9yaW5nIFByZXNpZGVudGlhbCBEZWJhdGUgb24gdHdpdHRlcicKYXV0aG9yOiAiRGFuaWVsIFMuIEhhaW4gKGRzaEBidXNpbmVzcy5hYXUuZGspIgpkYXRlOiAiVXBkYXRlZCBgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVCICVkLCAlWScpYCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGRmX3ByaW50OiBwYWdlZAogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDIKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiBmYWxzZQogICAgdGhlbWU6IGZsYXRseQotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQojIyMgR2VuZXJpYyBwcmVhbWJsZQpybShsaXN0PWxzKCkpClN5cy5zZXRlbnYoTEFORyA9ICJlbiIpICMgRm9yIGVuZ2xpc2ggbGFuZ3VhZ2UKb3B0aW9ucyhzY2lwZW4gPSA1KSAjIFRvIGRlYWN0aXZhdGUgYW5ub3lpbmcgc2NpZW50aWZpYyBudW1iZXIgbm90YXRpb24KCiMjIyBLbml0ciBvcHRpb25zCmxpYnJhcnkoa25pdHIpICMgRm9yIGRpc3BsYXkgb2YgdGhlIG1hcmtkb3duCmtuaXRyOjpvcHRzX2NodW5rJHNldCh3YXJuaW5nPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICBtZXNzYWdlPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICBjb21tZW50PUZBTFNFLCAKICAgICAgICAgICAgICAgICAgICAgZmlnLmFsaWduPSJjZW50ZXIiCiAgICAgICAgICAgICAgICAgICAgICkKYGBgCgpgYGB7cn0KIyMjIExvYWQgc3RhbmRhcmRwYWNrYWdlcwpsaWJyYXJ5KHRpZHl2ZXJzZSkgIyBDb2xsZWN0aW9uIG9mIGFsbCB0aGUgZ29vZCBzdHVmZiBsaWtlIGRwbHlyLCBnZ3Bsb3QyIGVjdC4KbGlicmFyeShtYWdyaXR0cikgIyBGb3IgZXh0cmEtcGlwaW5nIG9wZXJhdG9ycyAoZWcuICU8PiUpCmBgYAoKYGBge3J9CmxpYnJhcnkodGlkeXRleHQpCmBgYAoKCiMgRG93bmxvYWQgdGhlIGRhdGEKCmBgYHtyfQojIGRvd25sb2FkIGFuZCBvcGVuIHNvbWUgVHJ1bXAgdHdlZXRzIGZyb20gdHJ1bXBfdHdlZXRfZGF0YV9hcmNoaXZlCmxpYnJhcnkoanNvbmxpdGUpCnRtcCA8LSB0ZW1wZmlsZSgpCmRvd25sb2FkLmZpbGUoImh0dHBzOi8vZ2l0aHViLmNvbS9TRFMtQUFVL1NEUy1tYXN0ZXIvcmF3L21hc3Rlci9NMi9kYXRhL3BvbF90d2VldHMuZ3oiLCB0bXApCgp0d2VldHNfcmF3IDwtIHN0cmVhbV9pbihnemZpbGUodG1wLCAicG9sX3R3ZWV0cyIpKQpgYGAKCmBgYHtyfQp0d2VldHNfcmF3ICU+JSBnbGltcHNlKCkKYGBgCgpgYGB7cn0KdHdlZXRzIDwtIHRpYmJsZShJRCA9IGNvbG5hbWVzKHR3ZWV0c19yYXdbWzFdXSksIAogICAgICAgICAgICAgICAgIHRleHQgPSB0d2VldHNfcmF3W1sxXV0gJT4lIGFzLmNoYXJhY3RlcigpLCAKICAgICAgICAgICAgICAgICBsYWJlbHMgPSB0d2VldHNfcmF3W1syXV0gJT4lIGFzLmxvZ2ljYWwoKSkKI3JtKHR3ZWV0c19yYXcpCmBgYAoKYGBge3J9CnR3ZWV0cyAlPiUgaGVhZCgpCmBgYAoKCmBgYHtyfQp0d2VldHMgJTw+JQogIGZpbHRlcighKHRleHQgJT4lIHN0cl9kZXRlY3QoJ15SVCcpKSkgIyBGaWx0ZXIgcmV0d2VldHMKYGBgCgpgYGB7cn0KdHdlZXRzICU+JSBoZWFkKCkKYGBgCgojIFRpZHlpbmcKCmBgYHtyfQp0d2VldHNfdGlkeSA8LSB0d2VldHMgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0LCB0b2tlbiA9ICJ0d2VldHMiKSAKYGBgCgpgYGB7cn0KdHdlZXRzX3RpZHkgJT4lIGhlYWQoNTApCmBgYAoKCmBgYHtyfQp0d2VldHNfdGlkeSAlPiUgY291bnQod29yZCwgc29ydCA9IFRSVUUpCmBgYAoKCiMgUHJlcHJvY2Vzc2luZwoKYGBge3J9CiMgcHJlcHJvY2Vzc2luZwp0d2VldHNfdGlkeSAlPD4lCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnQCcpKSkgJT4lICMgcmVtb3ZlIG1lbnRpb25zCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnXmFtcHxeaHR0cHxedFxcLmNvJykpKSAlPiUgIyBUd2l0dGVyIHNwZWNpZmljIHN0dWZmCiMgIG11dGF0ZSh3b3JkID0gd29yZCAlPiUgc3RyX3JlbW92ZV9hbGwoJ1teWzphbG51bTpdXScpKSAlPiUgIyMgcmVtb3ZlIGFsbCBzcGVjaWFsIGNoYXJhY3RlcnMKICBmaWx0ZXIoc3RyX2xlbmd0aCh3b3JkKSA+IDIgKSAlPiUgIyBSZW1vdmUgd29yZHMgd2l0aCBsZXNzIHRoYW4gIDMgY2hhcmFjdGVycwogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPiAxMDApICU+JSAjIHJlbW92ZSB3b3JkcyBvY2N1cmluZyBsZXNzIHRoYW4gMTAwIHRpbWVzCiAgdW5ncm91cCgpICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzLCBieSA9ICd3b3JkJykgIyByZW1vdmUgc3RvcHdvcmRzCmBgYAoKIyBURklERiB3ZWlnaHRpbmcKCmBgYHtyfQojIHRvcCB3b3Jkcwp0d2VldHNfdGlkeSAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgJT4lCiAgaGVhZCgyMCkKYGBgCgpgYGB7cn0KIyBURklERiB3ZWlnaHRzCnR3ZWV0c190aWR5ICU8PiUKICBhZGRfY291bnQoSUQsIHdvcmQpICU+JQogIGRpc3RpbmN0KElELCB3b3JkLCAua2VlcF9hbGwgPSBUUlVFKSAlPiUKICBiaW5kX3RmX2lkZih0ZXJtID0gd29yZCwKICAgICAgICAgICAgICBkb2N1bWVudCA9IElELAogICAgICAgICAgICAgIG4gPSBuKQpgYGAKCgpgYGB7cn0KIyBURklERiB0b3B3b3Jkcwp0d2VldHNfdGlkeSAlPiUKICBjb3VudCh3b3JkLCB3dCA9IHRmX2lkZiwgc29ydCA9IFRSVUUpICU+JQogIGhlYWQoMjApCmBgYAoKIyBJbnNwZWN0aW5nCgojIyBXb3JkcyBieSBwYXJ0eSBhZmZpbGlhdGlvbgoKYGBge3J9CmxhYmVsc193b3JkcyA8LSB0d2VldHNfdGlkeSAlPiUKICBncm91cF9ieShsYWJlbHMpICU+JQogIGNvdW50KHdvcmQsIHd0ID0gdGZfaWRmLCBzb3J0ID0gVFJVRSwgbmFtZSA9ICJ0Zl9pZGYiKSAlPiUKICBzbGljZSgxOjEwMCkgJT4lCiAgdW5ncm91cCgpIApgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTB9CmxhYmVsc193b3JkcyAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXJfd2l0aGluKHdvcmQsIGJ5ID0gdGZfaWRmLCB3aXRoaW4gPSBsYWJlbHMpKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSB3b3JkLCB5ID0gdGZfaWRmLCBmaWxsID0gbGFiZWxzKSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gInRmLWlkZiIpICsKICBmYWNldF93cmFwKH5sYWJlbHMsIG5jb2wgPSAyLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHNjYWxlX3hfcmVvcmRlcmVkKCkKYGBgCgojIyBTZW50aW1lbnRzPwoKYGBge3J9CnNlbnRpbWVudF90d2VldCA8LSB0d2VldHNfdGlkeSAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpCmBgYAoKLi4uIFRvIGJlIGNvbnRpbnVlZCBieSB5b3UKCiMjIFRvd2FyZHMgcHJlZGljdGlvbj8KCmBgYHtyfQp0d2VldHNfZHRtIDwtIHR3ZWV0c190aWR5ICU+JQogIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSB3b3JkLCB2YWx1ZXNfZnJvbSA9IHRmX2lkZikKYGBgCgpgYGB7cn0KdHdlZXRzX2R0bSAlPD4lIG11dGF0ZShhY3Jvc3MoZXZlcnl0aGluZygpLCAuZm5zID0gfnJlcGxhY2VfbmEoLiwwKSkpIApgYGAKCmBgYHtyfQpybSh0d2VldHNfZHRtKQpgYGAKCiMjIFNpbXBsZSBtYW51YWwgYmFzZWxpbmUKCmBgYHtyfQp3b3Jkc19jbGFzc2lmaWVyIDwtIGxhYmVsc193b3JkcyAlPiUKICBhcnJhbmdlKGRlc2ModGZfaWRmKSkgJT4lCiAgZGlzdGluY3Qod29yZCwgLmtlZXBfYWxsID0gVFJVRSkgJT4lCiAgc2VsZWN0KC10Zl9pZGYpCmBgYAoKYGBge3J9CnR3ZWV0X251bGxfbW9kZWwgPC0gdHdlZXRzX3RpZHkgJT4lCiAgaW5uZXJfam9pbihsYWJlbHNfd29yZHMsIGJ5ID0gJ3dvcmQnKQpgYGAKCmBgYHtyfQpudWxsX3JlcyA8LSB0d2VldF9udWxsX21vZGVsICU+JQogIGdyb3VwX2J5KElEKSAlPiUKICBzdW1tYXJpc2UodHJ1dGggPSBtZWFuKGxhYmVscy54LCBuYS5ybSA9IFRSVUUpICU+JSByb3VuZCgwKSwKICAgICAgICAgcHJlZCA9IG1lYW4obGFiZWxzLnksIG5hLnJtID0gVFJVRSkgJT4lIHJvdW5kKDApKQpgYGAKCmBgYHtyfQp0YWJsZShudWxsX3JlcyRwcmVkLCBudWxsX3JlcyR0cnV0aCkKYGBgCgojIFByZWRpY3RpdmUgbW9kZWwKCmBgYHtyfQpsaWJyYXJ5KHRpZHltb2RlbHMpCmBgYAoKIyMgUHJlcHJvY2Vzc2luZwoKYGBge3J9CiMgTm90aWNlLCB3ZSB1c2UgdGhlIGluaXRpYWwgdW50b2tlbml6ZWQgdHdlZXRzCmRhdGEgPC0gdHdlZXRzICU+JQogIHNlbGVjdChsYWJlbHMsIHRleHQpICU+JQogIHJlbmFtZSh5ID0gbGFiZWxzKSAlPiUKICBtdXRhdGUoeSA9IHkgICU+JSBhcy5mYWN0b3IoKSkgCmBgYAoKCiMjIFRyYWluaW5nICYgVGVzdCBzcGxpdAoKYGBge3J9CmRhdGFfc3BsaXQgPC0gaW5pdGlhbF9zcGxpdChkYXRhLCBwcm9wID0gMC43NSwgc3RyYXRhID0geSkKCmRhdGFfdHJhaW4gPC0gZGF0YV9zcGxpdCAgJT4lICB0cmFpbmluZygpCmRhdGFfdGVzdCA8LSBkYXRhX3NwbGl0ICU+JSB0ZXN0aW5nKCkKYGBgCgpgYGB7cn0KZGF0YV90cmFpbiAlPiUgY291bnQoeSkKYGBgCgoKIyMgUHJlcHJvY2Vzc2luZyBwaXBlbGluZQoKYGBge3J9CmxpYnJhcnkodGV4dHJlY2lwZXMpICMgQWRpdHRpb25hbCByZWNpcGVzIGZvciB3b3JraW5nIHdpdGggdGV4dCBkYXRhCmBgYAoKYGBge3J9CiMgVGhpcyByZWNpcGUgcHJldHR5IG11Y2ggcmVjb25zdHJ1Y3RzIGFsbCBwcmVwcm9jZXNzaW5nIHdlIGRpZCBzbyBmYXIKZGF0YV9yZWNpcGUgPC0gZGF0YV90cmFpbiAlPiUKICByZWNpcGUoeSB+LikgJT4lCiAgdGhlbWlzOjpzdGVwX3Vwc2FtcGxlKHkpICU+JSAjIEZvciB1cC9kb3duc2FtcGxpbmcgY2xhc3MgaW1iYWxhbmNlcyAob3B0aW1hbCkKICBzdGVwX2ZpbHRlcighKHRleHQgJT4lIHN0cl9kZXRlY3QoJ15SVCcpKSkgJT4lICMgVXBmcm9udCBmaWx0ZXJpbmcgcmV0d2VldHMKICBzdGVwX2ZpbHRlcih0ZXh0ICE9ICIiKSAlPiUKICAjIHRleHRyZWNpZXBlcwogIHN0ZXBfdG9rZW5pemUodGV4dCwgdG9rZW4gPSAidHdlZXRzIikgJT4lICMgdG9rZW5pemUKICBzdGVwX3Rva2VuZmlsdGVyKHRleHQsIG1pbl90aW1lcyA9IDc1KSAlPiUgICMgRmlsdGVyIG91dCByYXJlIHdvcmRzCiAgc3RlcF9zdG9wd29yZHModGV4dCwga2VlcCA9IEZBTFNFKSAlPiUgIyBGaWx0ZXIgc3RvcHdvcmRzCiAgc3RlcF90ZmlkZih0ZXh0KSAlPiUgIyBURklERiB3ZWlnaHRpbmcKICAjc3RlcF9wY2EoYWxsX3ByZWRpY3RvcnMoKSkgJT4lICMgRGltZW5zaW9uYWxpdHkgcmVkdWN0aW9uIHZpYSBQQ0EgKG9wdGlvbmFsKQogIHByZXAoKQpgYGAKCgpgYGB7cn0KZGF0YV9yZWNpcGUKYGBgCgpTaW5jZSB3ZSB3aWxsIG5vdCBkbyBoeXBlcnBhcmFtZXRlciB0dW5pbmcsIHdlIGRpcmVjdGx5IGJha2UvanVpY2UgdGhlIHJlY2lwZQoKYGBge3J9CmRhdGFfdHJhaW5fcHJlcCA8LSBkYXRhX3JlY2lwZSAlPiUganVpY2UoKQpkYXRhX3Rlc3RfcHJlcCA8LSBkYXRhX3JlY2lwZSAlPiUgYmFrZShkYXRhX3Rlc3QpCmBgYAoKCiMjIERlZmluaW5nIHRoZSBtb2RlbHMKCmBgYHtyfQptb2RlbF9udWxsIDwtIG51bGxfbW9kZWwobW9kZSA9ICdjbGFzc2lmaWNhdGlvbicpCmBgYAoKYGBge3J9Cm1vZGVsX2VuIDwtIGxvZ2lzdGljX3JlZyhtb2RlID0gJ2NsYXNzaWZpY2F0aW9uJywKICAgICAgICAgICAgICAgICAgICAgICAgIG1peHR1cmUgPSAwLjUsCiAgICAgICAgICAgICAgICAgICAgICAgICBwZW5hbHR5ID0gMC41KSAlPiUKICBzZXRfZW5naW5lKCdnbG0nLCBmYW1pbHkgPSBiaW5vbWlhbCkgCmBgYAoKCiMjIERlZmluZSB0aGUgd29ya2Zsb3cKCldlIHdpbGwgc2tpcCB0aGUgd29ya2Zsb3cgc3RlcCB0aGlzIHRpbWUsIHNpbmNlIHdlIGRvIG5vdCBldmFsdWF0ZSBkaWZmZXJlbnQgbW9kZWxzIGFnYWluc3QgZWFjaCBvdGhlcnMuCgojIyBmaXQgdGhlIG1vZGVsCgpgYGB7cn0KZml0X251bGwgPC0gbW9kZWxfbnVsbCAlPiUgZml0KGZvcm11bGEgPSB5IH4uLCBkYXRhID0gZGF0YV90cmFpbl9wcmVwKQpgYGAKCmBgYHtyfQpmaXRfZW4gPC0gbW9kZWxfZW4gJT4lIGZpdChmb3JtdWxhID0geSB+LiwgZGF0YSA9IGRhdGFfdHJhaW5fcHJlcCkKYGBgCgoKYGBge3J9CnByZWRfY29sbGVjdGVkIDwtIHRpYmJsZSgKICB0cnV0aCA9IGRhdGFfdGVzdF9wcmVwICU+JSBwdWxsKHkpLAogIHByZWQgPSBmaXRfZW4gJT4lIHByZWRpY3QobmV3X2RhdGEgPSBkYXRhX3Rlc3RfcHJlcCkgJT4lIHB1bGwoLnByZWRfY2xhc3MpLAogIHByZWRfcHJvYiA9IGZpdF9lbiAlPiUgcHJlZGljdChuZXdfZGF0YSA9IGRhdGFfdGVzdF9wcmVwLCB0eXBlID0gInByb2IiKSAlPiUgcHVsbCgucHJlZF9UUlVFKSwKICApIApgYGAKCmBgYHtyfQpwcmVkX2NvbGxlY3RlZCAlPiUgY29uZl9tYXQodHJ1dGgsIHByZWQpICU+JSBhdXRvcGxvdCh0eXBlID0gJ2hlYXRtYXAnKQpgYGAKCmBgYHtyfQpwcmVkX2NvbGxlY3RlZCAlPiUgY29uZl9tYXQodHJ1dGgsIHByZWQpICU+JSBzdW1tYXJ5KCkKYGBgCldlbGwuLi4gc29zbwoKIyBVc2luZyB0aGUgbW9kZWwgZm9yIG5ldyBwcmVkaWN0aW9uCgojIyBTaW1wbGUgdGVzdAoKYGBge3J9CiMgSG93IHdvdWxkIHRoZSBtb2RlbCBwcmVkaWN0IGdpdmVuIHNvbWUgdHdlZXQgdGV4dApwcmVkX293biA9IHRpYmJsZSh0ZXh0ID0gJ3RydW1wIGlzIHJlYWxseSBiYWQuIHdlIG5lZWQgbW9yZSBncmVlbiBlbmVyZ3kgIHRvIHNhdmUgdGhlIGVudmlyb21lbnQgYW5kIGZ1dXR1cmUgb2Ygb3VyIGNoaWxkcmVuJykKYGBgCgpgYGB7cn0KZml0X2VuICU+JSBwcmVkaWN0KG5ld19kYXRhID0gZGF0YV9yZWNpcGUgJT4lIGJha2UocHJlZF9vd24pKQpgYGAKCiMgUHJlZGljdGlvbiBvbiBuZXcgdHdlZXRzCgojIyBOZXcgZGF0YQoKKiBXZSBjb3VsZCBhbHNvIHVzZSB0aGUgbW9kZWwgdG8gcHJlZGljdCBvbiBuZXcgZGF0YSwgc3VjaCBhcyB0aGUganVzdCBzY3JhcGVkIGRpc2N1c3Npb24gb24gdGhlIHByZXNpZGVudGlhbCBkZWJhdGUuCgpgYGB7cn0KIyBkb3dubG9hZCBhbmQgb3BlbiBzb21lIFRydW1wIHR3ZWV0cyBmcm9tIHRydW1wX3R3ZWV0X2RhdGFfYXJjaGl2ZQpkb3dubG9hZC5maWxlKCJodHRwczovL2dpdGh1Yi5jb20vU0RTLUFBVS9TRFMtbWFzdGVyL3Jhdy9tYXN0ZXIvTTIvZGF0YS9wcmVzX2RlYmF0ZV8yMDIwLmd6IiwgdG1wKQoKdHdlZXRzX3Jhd19uZXcgPC0gc3RyZWFtX2luKGd6ZmlsZSh0bXAsICJwcmVzX2RlYmF0ZV8yMDIwIikpCmBgYAoKYGBge3J9CnR3ZWV0c19yYXdfbmV3ICU+JSBnbGltcHNlKCkKYGBgCgpgYGB7cn0KdHdlZXRzX25ldyA8LSB0aWJibGUoSUQgPSB0d2VldHNfcmF3X25ldyRpZFsxLF0gJT4lIHQoKSAlPiUgYXMuY2hhcmFjdGVyKCksIAogICAgICAgICAgICAgICAgICAgICB0ZXh0ID0gdHdlZXRzX3Jhd19uZXckdHdlZXRbMSxdICU+JSB0KCkgJT4lIGFzLmNoYXJhY3RlcigpKQojcm0odHdlZXRzX3Jhd19uZXcpCmBgYAoKYGBge3J9CnR3ZWV0c19uZXcgJT4lIGdsaW1wc2UoKQpgYGAKCiMjIERvaW5nIGEgcHJlZGljdGlvbgpgYGB7cn0KZGF0YV9uZXcgPC0gZGF0YV9yZWNpcGUgJT4lIGJha2UodHdlZXRzX25ldykKYGBgCgpgYGB7cn0KZGF0YV9uZXcgJT4lIGdsaW1wc2UoKQpgYGAKCmBgYHtyfQpwcmVkX25ldyA8LSBmaXRfZW4gJT4lIHByZWRpY3QobmV3X2RhdGEgPSBkYXRhX25ldykKcHJlZF9wcm9iX25ldyA8LSBmaXRfZW4gJT4lIHByZWRpY3QobmV3X2RhdGEgPSBkYXRhX25ldywgdHlwZSA9ICJwcm9iIikKYGBgCgojIyBFeHBsb3JpbmcgdGhlIG5ldyBkYXRhICYgcHJlZGljdGlvbnMKCmBgYHtyfQpkYXRhX25ldyAlPD4lCiAgYmluZF9jb2xzKHByZWRfbmV3KSAlPiUKICByZW5hbWUocHJlZCA9IC5wcmVkX2NsYXNzKSAlPiUKICBiaW5kX2NvbHMocHJlZF9wcm9iX25ldykgJT4lCiAgcmVuYW1lKHByZWRfcHJvYiA9IC5wcmVkX1RSVUUpIApgYGAKCmBgYHtyfQp0d2VldHNfbmV3ICU8PiUKICBiaW5kX2NvbHMocHJlZF9uZXcpICU+JQogIHJlbmFtZShwcmVkID0gLnByZWRfY2xhc3MpIApgYGAKCgpgYGB7cn0KdHdlZXRzX3RpZHlfbmV3IDwtIHR3ZWV0c19uZXcgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0LCB0b2tlbiA9ICJ0d2VldHMiKSAKYGBgCgpgYGB7cn0KIyBwcmVwcm9jZXNzaW5nCnR3ZWV0c190aWR5X25ldyAlPD4lCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnQHwjcHJlc2lkZW50aWFsJykpKSAlPiUgIyByZW1vdmUgaGFzaHRhZ3MgYW5kIG1lbnRpb25zCiAgZmlsdGVyKCEod29yZCAlPiUgc3RyX2RldGVjdCgnXmFtcHxeaHR0cHxedFxcLmNvJykpKSAlPiUgIyBUd2l0dGVyIHNwZWNpZmljIHN0dWZmCiMgIG11dGF0ZSh3b3JkID0gd29yZCAlPiUgc3RyX3JlbW92ZV9hbGwoJ1teWzphbG51bTpdXScpKSAlPiUgIyMgcmVtb3ZlIGFsbCBzcGVjaWFsIGNoYXJhY3RlcnMKICBmaWx0ZXIoc3RyX2xlbmd0aCh3b3JkKSA+IDIgKSAlPiUgIyBSZW1vdmUgd29yZHMgd2l0aCBsZXNzIHRoYW4gIDMgY2hhcmFjdGVycwogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPiAxMDApICU+JSAjIHJlbW92ZSB3b3JkcyBvY2N1cmluZyBsZXNzIHRoYW4gMTAwIHRpbWVzCiAgdW5ncm91cCgpICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzLCBieSA9ICd3b3JkJykgIyByZW1vdmUgc3RvcHdvcmRzCmBgYAoKCmBgYHtyfQojIFRGSURGIHdlaWdodHMKdHdlZXRzX3RpZHlfbmV3ICU8PiUKICBhZGRfY291bnQoSUQsIHdvcmQpICU+JQogIGRpc3RpbmN0KElELCB3b3JkLCAua2VlcF9hbGwgPSBUUlVFKSAlPiUKICBiaW5kX3RmX2lkZih0ZXJtID0gd29yZCwKICAgICAgICAgICAgICBkb2N1bWVudCA9IElELAogICAgICAgICAgICAgIG4gPSBuKQpgYGAKCmBgYHtyfQpsYWJlbHNfd29yZHNfbmV3IDwtIHR3ZWV0c190aWR5X25ldyAlPiUKICBncm91cF9ieShwcmVkKSAlPiUKICBjb3VudCh3b3JkLCB3dCA9IHRmX2lkZiwgc29ydCA9IFRSVUUsIG5hbWUgPSAidGZfaWRmIikgJT4lCiAgc2xpY2UoMToyMCkgJT4lCiAgdW5ncm91cCgpIApgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTB9CmxhYmVsc193b3Jkc19uZXcgJT4lCiAgbXV0YXRlKHdvcmQgPSByZW9yZGVyX3dpdGhpbih3b3JkLCBieSA9IHRmX2lkZiwgd2l0aGluID0gcHJlZCkpICU+JQogIGdncGxvdChhZXMoeCA9IHdvcmQsIHkgPSB0Zl9pZGYsIGZpbGwgPSBwcmVkKSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gInRmLWlkZiIpICsKICBmYWNldF93cmFwKH5wcmVkLCBuY29sID0gMiwgc2NhbGVzID0gImZyZWUiKSArCiAgY29vcmRfZmxpcCgpICsKICBzY2FsZV94X3Jlb3JkZXJlZCgpCmBgYAoKCiMgVG9waWMgbW9kZWxzIChMREEpIG9uIG5ldyBkYXRhCgpgYGB7cn0KIyBmb3IgTERBIGFuYWx5c2lzCmxpYnJhcnkodG9waWNtb2RlbHMpCmBgYAoKIyMjIFByZXBhcmluZyB0aGUgRGF0YQoKYGBge3J9CiMgTERBIHZpYSB0aGUgdG9waWNtb2RlbCBwYWNrYWdlIHJlcXVpcmVzIGEgZG9jdW1lbnQtdGVybS1tYXRyaXggKGR0bSkKdHdlZXRzX2R0bSA8LSB0d2VldHNfdGlkeV9uZXcgJT4lCiAgY2FzdF9kdG0oZG9jdW1lbnQgPSBJRCwgdGVybSA9IHdvcmQsIHZhbHVlID0gbikKYGBgCgpMZXRzIHRha2UgYSBsb29rOgoKYGBge3J9CnR3ZWV0c19kdG0KYGBgCgoqIFdlIHNlZSBhZ2FpbiBoYXQgdGhlIG1hdHJpeCBpcyBzdGlsbCByYXRoZXIgc3BhcnNlLCB3aGljaCBpcyBhbiBhcnRlZmFjdCBvZiB0ZXh0IGRhdGEgZ2VuZXJhbGx5LCBidXQgZXZlbiBtb3JlIHNvIHdoZW4gdXNpbmcgdHdpdHRlciBkYXRhLiAKKiBMZXRzIHRyeSB0byBzZWUgaWYgd2UgY291bGQgcmVkdWNlIHRoYXQgc29tZXdoYXQgYnkgZGVsZXRpbmcgbGVzcyBvZnRlbiB1c2VkIHRlcm1zLgoKYGBge3J9CmxpYnJhcnkodG0pCnR3ZWV0c19kdG0gJT4lIHJlbW92ZVNwYXJzZVRlcm1zKHNwYXJzZSA9IC45OSkKYGBgCgoqIE5vdyB3ZSBjYW4gcGVyZm9ybSBhIExEQSwgdXNpbmcgdGhlIG1vcmUgYWNjdXJhdGUgR2liYnMgc2FtcGxpbmcgYXMgYG1ldGhvZGAuCgpgYGB7cn0KdHdlZXRzX2xkYSA8LSB0d2VldHNfZHRtICU+JSAKICBMREEoayA9IDYsIG1ldGhvZCA9ICJHaWJicyIsCiAgICAgIGNvbnRyb2wgPSBsaXN0KHNlZWQgPSAxMzM3KSkKYGBgCgojIyMgJFxiZXRhJDogV29yZC1Ub3BpYyBBc3NvY2lhdGlvbgoKKiAkXGJldGEkIGlzIGFuIG91dHB1dCBvZiB0aGUgTERBIG1vZGVsLCBpbmRpY2F0aW5nIHRoZSBwcm9wYWJpbGl0eSB0aGF0IGEgd29yZCBvY2N1cnMgaW4gYSBjZXJ0YWluIHRvcGljLgoqIFRoZXJlZm9yZSwgbG9raW5nIGF0IHRoZSB0b3AgcHJvYmFiaWxpdHkgd29yZHMgb2YgYSB0b3BpYyBvZnRlbiBnaXZlcyB1cyBhIGdvb2QgaW50dWl0aW9uIHJlZ2FyZGluZyBpdHMgcHJvcGVydGllcy4KCmBgYHtyfQojIExEQSBvdXRwdXQgaXMgZGVmaW5lZCBmb3IgdGlkeSgpLCBzbyB3ZSBjYW4gZWFzaWx5IGV4dHJhY3QgaXQKbGRhX2JldGEgPC0gdHdlZXRzX2xkYSAlPiUgCiAgdGlkeShtYXRyaXggPSAiYmV0YSIpICU+JQogIGdyb3VwX2J5KHRvcGljKSAlPiUKICBhcnJhbmdlKHRvcGljLCBkZXNjKGJldGEpKSAlPiUKICBzbGljZSgxOjEwKSAlPiUKICB1bmdyb3VwKCkgCmBgYAoKYGBge3J9CmxkYV9iZXRhICU+JSBoZWFkKCkKYGBgCgpgYGB7cn0KIyBOb3RpY2UgdGhlICJyZW9yZGVyX3dpdGhpbigpIgpsZGFfYmV0YSAlPiUKICBtdXRhdGUodGVybSA9IHJlb3JkZXJfd2l0aGluKHRlcm0sIGJldGEsIHRvcGljKSkgJT4lCiAgZ3JvdXBfYnkodG9waWMsIHRlcm0pICU+JSAgICAKICBhcnJhbmdlKGRlc2MoYmV0YSkpICU+JSAgCiAgdW5ncm91cCgpICU+JQogIGdncGxvdChhZXModGVybSwgYmV0YSwgZmlsbCA9IGFzLmZhY3Rvcih0b3BpYykpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGNvb3JkX2ZsaXAoKSArCiAgc2NhbGVfeF9yZW9yZGVyZWQoKSArCiAgbGFicyh0aXRsZSA9ICJUb3AgMTAgdGVybXMgaW4gZWFjaCBMREEgdG9waWMiLAogICAgICAgeCA9IE5VTEwsIHkgPSBleHByZXNzaW9uKGJldGEpKSArCiAgZmFjZXRfd3JhcCh+IHRvcGljLCBuY29sID0gMiwgc2NhbGVzID0gImZyZWUiKQpgYGAKCiMjIyAkXGdhbW1hJDogRG9jdW1lbnQtVG9waWMgQXNzb2NpYXRpb24KCiogSW4gTERBLCBkb2N1bWVudHMgYXJlIHJlcHJlc2VudGVkIGFzIGEgbWl4IG9mIHRvcGljcy4gVGhpcyBhc3NvY2lhdGlvbiBvZiBhIGRvY3VtZW50IHRvIGEgdG9waWMgaXMgY2FwdHVyZWQgYnkgJFxnYW1tYSQKCmBgYHtyfQpsZGFfZ2FtbWEgPC0gdHdlZXRzX2xkYSAlPiUgCiAgdGlkeShtYXRyaXggPSAiZ2FtbWEiKQpgYGAKCmBgYHtyfQpsZGFfZ2FtbWEgJT4lIGhlYWQoKQpgYGAKCmBgYHtyfQpsZGFfZ2FtbWEgJT4lCiAgZ2dwbG90KGFlcyhnYW1tYSkpICsKICBnZW9tX2hpc3RvZ3JhbSgpICsKICBzY2FsZV95X2xvZzEwKCkgKwogIGxhYnModGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIHByb2JhYmlsaXRpZXMgZm9yIGFsbCB0b3BpY3MiLAogICAgICAgeSA9ICJOdW1iZXIgb2YgZG9jdW1lbnRzIiwgeCA9IGV4cHJlc3Npb24oZ2FtbWEpKQpgYGAKCmBgYHtyfQpsZGFfZ2FtbWEgJTw+JQogIGxlZnRfam9pbih0d2VldHNfbmV3ICU+JSBzZWxlY3QoSUQsIHByZWQpLCBieSA9IGMoJ2RvY3VtZW50JyA9ICdJRCcpKQpgYGAKCmBgYHtyfQpsZGFfZ2FtbWEgJT4lCiAgZ3JvdXBfYnkocHJlZCwgdG9waWMpICU+JQogIHN1bW1hcmlzZShnYW1tYSA9IHN1bShnYW1tYSkpICU+JQogIGFycmFuZ2UocHJlZCwgZ2FtbWEpCmBgYAoKYGBge3J9CmxkYV9nYW1tYSAlPiUKICBnZ3Bsb3QoYWVzKGdhbW1hLCBmaWxsID0gYXMuZmFjdG9yKHRvcGljKSkpICsKICBnZW9tX2hpc3RvZ3JhbShzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh+IHRvcGljLCBuY29sID0gMikgKwogIHNjYWxlX3lfbG9nMTAoKSArCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgcHJvYmFiaWxpdHkgZm9yIGVhY2ggdG9waWMiLAogICAgICAgeSA9ICJOdW1iZXIgb2YgZG9jdW1lbnRzIiwgeCA9IGV4cHJlc3Npb24oZ2FtbWEpKQpgYGAKCmBgYHtyfQp0b3BpY21vZGVsc19qc29uX2xkYXZpcyA8LSBmdW5jdGlvbihmaXR0ZWQsIGRvY19kdG0sIG1ldGhvZCA9ICJQQ0EiLCBkb2NfaW4gPSBOVUxMLCB0b3BpY19pbiA9IE5VTEwpewogIHJlcXVpcmUodG9waWNtb2RlbHMpOyByZXF1aXJlKGRwbHlyKTsgcmVxdWlyZShMREF2aXMpCiAgCiAgIyBGaW5kIHJlcXVpcmVkIHF1YW50aXRpZXMKICBwaGkgPC0gcG9zdGVyaW9yKGZpdHRlZCkkdGVybXMgJT4lIGFzLm1hdHJpeCgpICMgVG9waWMtdGVybSBkaXN0cmlidXRpb24KICB0aGV0YSA8LSBwb3N0ZXJpb3IoZml0dGVkKSR0b3BpY3MgJT4lIGFzLm1hdHJpeCgpICMgRG9jdW1lbnQtdG9waWMgbWF0cml4CiAgCiAgIyBSZXN0cmljdAogIGlmKCFpc19udWxsKHRvcGljX2luKSl7CiAgICBwaGkgPC0gcGhpW3RvcGljX2luLCBdCiAgICB0aGV0YSA8LSB0aGV0YVsgLCB0b3BpY19pbl0KICB9CiAgdGV4dF90aWR5IDwtIGRvY19kdG0gJT4lIHRpZHkoKQogIHZvY2FiIDwtIGNvbG5hbWVzKHBoaSkKICBkb2NfbGVuZ3RoIDwtIHRpYmJsZShkb2N1bWVudCA9IHJvd25hbWVzKHRoZXRhKSkgJT4lIGxlZnRfam9pbih0ZXh0X3RpZHkgJT4lIGNvdW50KGRvY3VtZW50LCB3dCA9IGNvdW50KSwgYnkgPSAnZG9jdW1lbnQnKQogIHRmIDwtIHRpYmJsZSh0ZXJtID0gdm9jYWIpICU+JSBsZWZ0X2pvaW4odGV4dF90aWR5ICU+JSBjb3VudCh0ZXJtLCB3dCA9IGNvdW50KSwgYnkgPSAidGVybSIpIAogIAogIGlmKG1ldGhvZCA9PSAiUENBIil7bWRzIDwtIGpzUENBfQogIGlmKG1ldGhvZCA9PSAiVFNORSIpe2xpYnJhcnkodHNuZSk7IG1kcyA8LSBmdW5jdGlvbih4KXt0c25lKHN2ZCh4KSR1KX0gfQogIAogICMgQ29udmVydCB0byBqc29uCiAganNvbl9sZGEgPC0gTERBdmlzOjpjcmVhdGVKU09OKHBoaSA9IHBoaSwgdGhldGEgPSB0aGV0YSwgdm9jYWIgPSB2b2NhYiwgZG9jLmxlbmd0aCA9IGRvY19sZW5ndGggJT4lIHB1bGwobiksIHRlcm0uZnJlcXVlbmN5ID0gdGYgJT4lIHB1bGwobiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlb3JkZXIudG9waWNzID0gRkFMU0UsIG1kcy5tZXRob2QgPSBtZHMscGxvdC5vcHRzID0gbGlzdCh4bGFiID0gIkRpbS4xIiwgeWxhYiA9ICJEaW0uMiIpKSAKICByZXR1cm4oanNvbl9sZGEpCn0KYGBgCgoKYGBge3J9CmxpYnJhcnkoTERBdmlzKQpqc29uX2xkYSA8LSB0b3BpY21vZGVsc19qc29uX2xkYXZpcyhmaXR0ZWQgPSB0d2VldHNfbGRhLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZG9jX2R0bSA9IHR3ZWV0c19kdG0sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAiVFNORSIpCmpzb25fbGRhICU+JSBzZXJWaXMoKQojIGpzb25fbGRhICU+JSBzZXJWaXMob3V0LmRpciA9ICdMREF2aXonKQpgYGAKCiMgTW9kZWwgZXhwbGFpbmFiaWxpdHkKCiMjIEdsb2JhbAoKYGBge3J9CmZpdF9lbiAlPiUgdmlwOjp2aXAoKQpgYGAKCiMjIExvY2FsCgpgYGB7cn0KbGlicmFyeShsaW1lKQpgYGAKCmBgYHtyfQpleHBsYW5hdGlvbiA8LSAubG9hZF9pbWFnZV9leGFtcGxlKCkKcGxvdF9pbWFnZV9leHBsYW5hdGlvbihleHBsYW5hdGlvbikKYGBgCgpgYGB7cn0KbGltZV90d2VldHMgPC0gbGltZShkYXRhX25ldywgZml0X2VuKQpgYGAKCmBgYHtyfQpyZWNpcGVfZGVwbG95IDwtIGZ1bmN0aW9uKGRhdGEpewogIGRhdGFfcmVjaXBlICU+JSBiYWtlKGRhdGEpCn0KYGBgCgoKYGBge3J9CmxpbWVfdHdlZXRzIDwtIGxpbWUoZGF0YV9uZXcsIGZpdF9lbiwKICAgICAgICAgICAgICAgICAgICBwcmVwcm9jZXNzID0gcmVjaXBlX2RlcGxveSkKYGBgCgoKYGBge3J9CmV4cGxhaW5lZF90d2VldHMgPC0gIGV4cGxhaW4oeCA9IGRhdGFfbmV3ICU+JSBzYW1wbGVfbig4KSwgCiAgICAgICAgICAgICAgICAgICAgICAgICBleHBsYWluZXIgPSBsaW1lX3R3ZWV0cywgCiAgICAgICAgICAgICAgICAgICAgICAgICBuX3Blcm11dGF0aW9ucyA9IDUwMDAsCiAgICAgICAgICAgICAgICAgICAgICAgICBkaXN0X2Z1biA9ICJnb3dlciIsCiAgICAgICAgICAgICAgICAgICAgICAgICBrZXJuZWxfd2lkdGggPSAwLjc1LAogICAgICAgICAgICAgICAgICAgICAgICAgbl9mZWF0dXJlcyA9IDEwLCAKICAgICAgICAgICAgICAgICAgICAgICAgIGZlYXR1cmVfc2VsZWN0ID0gImhpZ2hlc3Rfd2VpZ2h0cyIsCiAgICAgICAgICAgICAgICAgICAgICAgICBuX2xhYmVscyA9IDEgIyB0byBoYXZlIHRoZSBwcmVkaWN0ZWQgY2xhc3MgYXMgYmFzZWxpbmUKICAgICAgICAgICAgICAgICAgICAgICAgICMgbGFiZWxzID0gIlllcyIgIyB0byBoYXZlIHRlIHBvc2l0aXZlIGNsYXNzIGFzIGJhc2VsaW5lCiAgICAgICAgICAgICAgICAgICAgICAgICApCmBgYAoKCmBgYHtyIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD0xMn0KZXhwbGFpbmVkX3R3ZWV0cyAlPiUgcGxvdF9mZWF0dXJlcygpCmBgYAoKYGBge3J9CmV4cGxhaW5lZF90d2VldHMgJT4lIHBsb3RfdGV4dF9leHBsYW5hdGlvbnMoKQpgYGAKCgojIEVuZG5vdGVzCgojIyMgUGFja2FnZXMgJiBFY29zeXN0ZW0KCiogW2B0aWR5dGV4dGBdKGh0dHBzOi8vZ2l0aHViLmNvbS9qdWxpYXNpbGdlL3RpZHl0ZXh0KQoqIFtgdGV4dHJlY2lwZXNgXShodHRwczovL3RleHRyZWNpcGVzLnRpZHltb2RlbHMub3JnLykKKiBbYHRvcGljbW9kZWxzYF0oaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL3RvcGljbW9kZWxzL3ZpZ25ldHRlcy90b3BpY21vZGVscy5wZGYpCgpGdXJ0aGVyIE5MUCBwYWNrYWdlcyBlY29zeXN0ZW0KCiogYHRtYCBbaGVyZV0oaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL3RtLykKKiBgcXVhbnRlZGFgIFtoZXJlXShodHRwczovL3F1YW50ZWRhLmlvLyksIGFuZCBtYW55IG1hbnkgZ3JlYXQgdHV0b3JpYWxzIFtoZXJlXShodHRwczovL3R1dG9yaWFscy5xdWFudGVkYS5pby8pCgoKIyMjIFJlZmVyZW5jZXMgCgoqIEp1bGlhIFNpbGdlIGFuZCBEYXZpZCBSb2JpbnNvbiAoMjAyMCkuIFRleHQgTWluaW5nIHdpdGggUjogQSBUaWR5IEFwcHJvYWNoLCBP4oCZUmVpbGx5LiBPbmxpbmUgYXZhaWxhYmxlIFtoZXJlXShodHRwczovL3d3dy50aWR5dGV4dG1pbmluZy5jb20vKQogICAqIFtDaGFwdGVyIDZdKGh0dHBzOi8vd3d3LnRpZHl0ZXh0bWluaW5nLmNvbS90b3BpY21vZGVsaW5nLmh0bWwpOiBJbnRyb2R1Y3Rpb24gdG9waWMgbW9kZWxzCiogRW1pbCBIdmlkZmVsZHQgYW5kIEp1bGlhIFNpbGdlICgyMDIwKS4gU3VwZXJ2aXNlZCBNYWNoaW5lIExlYXJuaW5nIGZvciBUZXh0IEFuYWx5c2lzIGluIFIsIG9ubGluZSBhdmFpbGFibGUgW2hlcmVdKGh0dHBzOi8vc21sdGFyLmNvbS8pCiAgICogW0NoYXB0ZXIgN10oaHR0cHM6Ly9zbWx0YXIuY29tL21sY2xhc3NpZmljYXRpb24uaHRtbCk6IENsYXNzaWZpY2F0aW9uCgojIyMgRnVydGhlciBzb3VyY2VzCgpEYXRhY2FtcAoKKiAgW1RvcGljIE1vZGVsaW5nIGluIFJdKGh0dHBzOi8vbGVhcm4uZGF0YWNhbXAuY29tL2NvdXJzZXMvdG9waWMtbW9kZWxpbmctaW4tcikgCgpPdGhlciBvbmxpbmUKCiogW0p1bGlhIFNpbGdlJ3MgQmxvZ10oaHR0cHM6Ly9qdWxpYXNpbGdlLmNvbS8pOiBGdWxsIG9mIGdyZWF0IGV4YW1wbGVzIG9mIHByZWRpY3RpdmUgbW9kZWxpbmcsIE5MUCwgYW5kIHRoZSBjb21iaW5hdGlvbiBmbyBib3RoLCB1c2luZyB0aWR5IGVjb3N5c3RlbXMKCiMjIyBTZXNzaW9uIEluZm8KCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoKCgo=