library(tidyverse)
library(magrittr)
library(keras)
In this notebook you will learn about the different building blocks that form a convolutional neural net as well as how we can build one using Keras. CNNs are the kind of neural networks that really require computational resources and therefore, you should consider using Colab/Kaggle with GPU (or TPU support if you can figure it out) support to run that. If you run it on your own computer without a GPU things will take a lot of time…like a lot!
unz(temp, exdir = "dataset")
Error in unz(temp, exdir = "dataset") :
unused argument (exdir = "dataset")
list.files(path = "dataset", include.dirs = TRUE)
[1] "single_prediction" "test_set" "training_set"
list.files(path = "dataset/training_set") %>% head()
[1] "cats" "dogs"
The data is actually a folder with 3 folders inside it. A training_set, a test_set and another one for try-outs
In each the training and test_set folders we have 2 folders again. One for cats and one for dogs.
list.files(path = "dataset/training_set/cats") %>% head()
list.files(path = "dataset/training_set/dogs") %>% head()
Here some examples:
Now think, you are a computer and need to classify that. :-O
I first define a few other parameters in the beginning to make adapting as easy as possible.
# list of fruits to modle
class_list <- c('cats', 'dogs')
# number of output classes (i.e. fruits)
output_n <- length(class_list)
# image size to scale down to (original images are 100 x 100 px)
img_width <- 64
img_height <- 64
target_size <- c(img_width, img_height)
# RGB = 3 channels
channels <- 3
# path to image folders
train_files_path <- 'dataset/training_set'
test_iles_path <- 'dataset/test_set'
Another thing that we also will do is “image augmentation”.
Image Augmentations techniques are methods of artificially increasing the variations of images in our data-set by using horizontal/vertical flips, rotations, variations in brightness of images, horizontal/vertical shifts etc.
You can read more on that and in general about generators here.
* The handy
image_data_generator()
and flow_images_from_directory()
functions can be used to load images from a directory. * If you want to use data augmentation, you can directly define how and in what way you want to augment your images
# optional data augmentation
train_data_gen = image_data_generator(
rescale = 1/255, #,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = TRUE,
#fill_mode = "nearest",
#rotation_range = 40,
#width_shift_range = 0.2,
#height_shift_range = 0.2
)
# Validation data shouldn't be augmented! But it should also be scaled.
test_data_gen <- image_data_generator(
rescale = 1/255
)
Now we load the images into memory and resize them.
# training images
train_image_array_gen <- flow_images_from_directory(train_files_path,
train_data_gen,
target_size = target_size,
class_mode = "binary",
classes = class_list,
batch_size = 32,
seed = 1337)
Found 8000 images belonging to 2 classes.
# validation images
test_image_array_gen <- flow_images_from_directory(test_files_path,
test_data_gen,
target_size = target_size,
class_mode = "binary",
classes = class_list,
batch_size = 32,
seed = 1337)
Found 2000 images belonging to 2 classes.
table(factor(train_image_array_gen$classes))
0 1
4000 4000
train_image_array_gen$class_indices
$cats
[1] 0
$dogs
[1] 1
classes_indices <- train_image_array_gen$class_indices
model <- keras_model_sequential()
# Step 1 - Convolution - This is new
model <- model %>%
layer_conv_2d(filter = 32,
kernel_size = c(3,3),
padding = "same",
input_shape = c(img_width, img_height, channels),
activation = 'relu')
2020-11-19 10:10:25.008724: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-11-19 10:10:25.026455: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f80c1477430 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-19 10:10:25.026471: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
First, we should perhaps get an overall picture of how a CNN architecture looks.
alt text
model
Model
Model: "sequential"
_________________________________________________________________________________________________________________________________________________________________
Layer (type) Output Shape Param #
=================================================================================================================================================================
conv2d (Conv2D) (None, 64, 64, 32) 896
_________________________________________________________________________________________________________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 32) 0
=================================================================================================================================================================
Total params: 896
Trainable params: 896
Non-trainable params: 0
_________________________________________________________________________________________________________________________________________________________________
model %>%
layer_conv_2d(filter = 32, kernel_size = c(3,3), padding = "same", input_shape = c(img_width, img_height, channels), activation = 'relu') %>%
layer_max_pooling_2d(pool_size = c(2,2))
# Step 3 - Flattening
model %>%
layer_flatten()
model %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.2)
model %>%
layer_dense(units = 1, activation = 'sigmoid')
model %>% summary()
Model: "sequential"
_________________________________________________________________________________________________________________________________________________________________
Layer (type) Output Shape Param #
=================================================================================================================================================================
conv2d (Conv2D) (None, 64, 64, 32) 896
_________________________________________________________________________________________________________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 32) 0
_________________________________________________________________________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 32, 32, 32) 9248
_________________________________________________________________________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D) (None, 16, 16, 32) 0
_________________________________________________________________________________________________________________________________________________________________
flatten (Flatten) (None, 8192) 0
_________________________________________________________________________________________________________________________________________________________________
dense (Dense) (None, 128) 1048704
_________________________________________________________________________________________________________________________________________________________________
dropout (Dropout) (None, 128) 0
_________________________________________________________________________________________________________________________________________________________________
dense_1 (Dense) (None, 1) 129
=================================================================================================================================================================
Total params: 1,058,977
Trainable params: 1,058,977
Non-trainable params: 0
_________________________________________________________________________________________________________________________________________________________________
# deepviz::plot_model(model) # Visualize if you like
# compile
model %>% compile(
loss = "binary_crossentropy",
optimizer = 'adam',
metrics = "accuracy"
)
set.seed(1337)
# And now we can train the network
hist <- model %>% fit_generator(
train_image_array_gen,
steps_per_epoch = 800,
epochs = 2,
validation_data = test_image_array_gen,
validation_steps = 100,
verbose = FALSE,
)
hist %>% plot()
model %>% evaluate_generator(test_image_array_gen, steps = 500)
loss accuracy
0.4832610 0.7717994
list.files(path = "dataset/single_prediction")
[1] "cat_or_dog_1.jpg" "cat_or_dog_2.jpg"
img <- image_load('dataset/single_prediction/cat_or_dog_1.jpg', target_size = target_size)
img %<>%
image_to_array() %>%
array_reshape(c(1, dim(.)))
res <- predict(model, img)
ifelse(res == 0, 'cat', 'dog')
[,1]
[1,] "dog"
Lets plot the image and see if we where right
library(magick) # For reading and working with images
image_read('dataset/single_prediction/cat_or_dog_1.jpg') %>% as.raster() %>% plot()
Ok, now we can delete all the images again…
unlink("dataset", recursive = TRUE)
alt text
Build a hotdog-not-hotdog classifier
You can get the data here: https://storage.googleapis.com/sds-file-transfer/hot-dog-not-hot-dog.zip a bit faster…