top of page

TensorFlow Learning: Keras Tuner

Updated: Oct 8, 2024


ree

Source of data - Minst Fashion images

Problem domain - Classification

Learning Objective - Tune the Hyperparameters, identify the best optimizer and learning rates. And to start using tensorboard for analysis.


Here's the code:


import tensorflow as tf

from tensorflow import keras

import keras_tuner as kt

import os

import subprocess


# Load and preprocess the Fashion MNIST dataset

(img_train, label_train), (img_test, label_test) = keras.datasets.fashion_mnist.load_data()


img_train = img_train.astype('float32') / 255.0

img_test = img_test.astype('float32') / 255.0

# Add a channels dimension

img_train = img_train[..., tf.newaxis].astype("float32")

img_test = img_test[..., tf.newaxis].astype("float32")


# Set up the log directory for TensorBoard

log_dir = 'logs/hparam_tuning'

if not os.path.exists(log_dir):

    os.makedirs(log_dir)


# Data augmentation

datagen = tf.keras.preprocessing.image.ImageDataGenerator(

    rotation_range=10,

    width_shift_range=0.1,

    height_shift_range=0.1,

    horizontal_flip=False,

)

datagen.fit(img_train)


# Function to start TensorBoard as a subprocess

def start_tensorboard(logdir):

    subprocess.Popen(['tensorboard', '--logdir', logdir])


# Model-building function for Keras Tuner

def model_builder(hp):

    model = keras.Sequential()

    model.add(keras.layers.Flatten(input_shape=(28, 28)))

   

    # Tune the number of units in the first Dense layer

    units = hp.Int('units', min_value=32, max_value=512, step=32)

    model.add(keras.layers.Dense(units=units, activation='relu'))

   

    # Tune the dropout rate

    dropout = hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)

    model.add(keras.layers.Dropout(rate=dropout))

   

    model.add(keras.layers.Dense(10, activation='softmax'))


    # Tune the optimizer

    optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'RMSprop'])


    # Set the optimizer

    if optimizer == 'adam':

        selected_optimizer = keras.optimizers.Adam()

    elif optimizer == 'sgd':

        selected_optimizer = keras.optimizers.SGD()

    elif optimizer == 'RMSprop':

        selected_optimizer = keras.optimizers.RMSprop()


    model.compile(optimizer=selected_optimizer,

                  loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),

                  metrics=['accuracy'])


    return model


# Initialize the tuner

tuner = kt.Hyperband(

    model_builder,

    objective='val_accuracy',

    max_epochs=10,

    factor=3,

    directory='workspace',

    project_name='intro_to_kerasTuner'

)


# Early stopping callback

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)


# TensorBoard callback

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


# Start TensorBoard before searching hyperparameters

start_tensorboard(log_dir)


# Search for the best hyperparameters using augmented data

tuner.search(datagen.flow(img_train, label_train, batch_size=32),  # Use augmented data

             epochs=50,

             validation_data=(img_test, label_test),

             callbacks=[stop_early, tensorboard_callback])


# Get the best hyperparameters

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]


print(f"""

The hyperparameter search is complete. The optimal number of units in the first densely-connected

layer is {best_hps.get('units')}, the optimal dropout rate is {best_hps.get('dropout')}, and the best optimizer is {best_hps.get('optimizer')}.

""")


# Build the best model

model = tuner.hypermodel.build(best_hps)


# Train the model with augmented data and the best hyperparameters

history = model.fit(datagen.flow(img_train, label_train, batch_size=32),

                    epochs=50,

                    validation_data=(img_test, label_test),

                    callbacks=[tensorboard_callback])


# Determine the best epoch

val_acc_per_epoch = history.history['val_accuracy']

best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1

print('Best epoch: %d' % (best_epoch,))


# Retrain the model on the best epoch

hypermodel = tuner.hypermodel.build(best_hps)

hypermodel.fit(datagen.flow(img_train, label_train, batch_size=32), epochs=best_epoch,

               validation_data=(img_test, label_test))


# Evaluate the model on test data

eval_result = hypermodel.evaluate(img_test, label_test)

print("[test loss, test accuracy]:", eval_result)


This could achieve about 86% accuracy, which was good enough but I wanted to get it into the nineties. So made some more changes around data augmentation by introducing zooming. And advance the architecture of the model itself by introducing Conv2D layers. Additionally made the learning rate dynamically changes across the best hyperparameter search space.


'''

Best val_accuracy So Far: 0.9107999801635742

Total elapsed time: 02h 06m 19s


The hyperparameter search is complete. The optimal number of units in the first densely-connected

layer is 320, the optimal dropout rate is 0.30000000000000004, the best optimizer is adam, and the learning rate is 0.00028965674098173344.


'''


import tensorflow as tf

from tensorflow import keras

import keras_tuner as kt

import os

import subprocess


# Load and preprocess the Fashion MNIST dataset

(img_train, label_train), (img_test, label_test) = keras.datasets.fashion_mnist.load_data()


img_train = img_train.astype('float32') / 255.0

img_test = img_test.astype('float32') / 255.0

# Add a channels dimension

img_train = img_train[..., tf.newaxis].astype("float32")

img_test = img_test[..., tf.newaxis].astype("float32")


# Set up the log directory for TensorBoard

log_dir = 'logs/hparam_tuning'

if not os.path.exists(log_dir):

    os.makedirs(log_dir)


# Data augmentation

datagen = tf.keras.preprocessing.image.ImageDataGenerator(

    rotation_range=10,

    width_shift_range=0.1,

    height_shift_range=0.1,

    zoom_range=0.1,  # Adding zoom augmentation

    horizontal_flip=False,

)

datagen.fit(img_train)


# Function to start TensorBoard as a subprocess

def start_tensorboard(logdir):

    subprocess.Popen(['tensorboard', '--logdir', logdir])


# Model-building function for Keras Tuner

def model_builder(hp):

    model = keras.Sequential()

   

    # Adding convolutional layers

    model.add(keras.layers.Conv2D(filters=hp.Int('conv_1_filters', 32, 128, step=32),

                                  kernel_size=hp.Choice('conv_1_kernel', [3, 5]),

                                  activation='relu',

                                  input_shape=(28, 28, 1)))

    model.add(keras.layers.BatchNormalization())  # Batch normalization

    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

   

    model.add(keras.layers.Conv2D(filters=hp.Int('conv_2_filters', 64, 256, step=32),

                                  kernel_size=hp.Choice('conv_2_kernel', [3, 5]),

                                  activation='relu'))

    model.add(keras.layers.BatchNormalization())  # Batch normalization

    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

   

    model.add(keras.layers.Flatten())

   

    # Tune the number of units in the first Dense layer

    units = hp.Int('units', min_value=64, max_value=512, step=64)

    model.add(keras.layers.Dense(units=units, activation='relu'))

   

    # Tune the dropout rate

    dropout = hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)

    model.add(keras.layers.Dropout(rate=dropout))

   

    model.add(keras.layers.Dense(10, activation='softmax'))


    # Tune the optimizer and learning rate

    optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'RMSprop'])

    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')


    # Set the optimizer

    if optimizer == 'adam':

        selected_optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    elif optimizer == 'sgd':

        selected_optimizer = keras.optimizers.SGD(learning_rate=learning_rate)

    elif optimizer == 'RMSprop':

        selected_optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)


    model.compile(optimizer=selected_optimizer,

                  loss=keras.losses.SparseCategoricalCrossentropy(),

                  metrics=['accuracy'])


    return model


# Initialize the tuner

tuner = kt.Hyperband(

    model_builder,

    objective='val_accuracy',

    max_epochs=20,  # Increased to allow more training

    factor=3,

    directory='workspace',

    project_name='intro_to_kerasTuner'

)


# Early stopping callback

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)


# TensorBoard callback

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


# Start TensorBoard before searching hyperparameters

start_tensorboard(log_dir)


# Search for the best hyperparameters using augmented data

tuner.search(datagen.flow(img_train, label_train, batch_size=64),  # Larger batch size for faster convergence

             epochs=50,

             validation_data=(img_test, label_test),

             callbacks=[stop_early, tensorboard_callback])


# Get the best hyperparameters

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]


print(f"""

The hyperparameter search is complete. The optimal number of units in the first densely-connected

layer is {best_hps.get('units')}, the optimal dropout rate is {best_hps.get('dropout')}, the best optimizer is {best_hps.get('optimizer')}, and the learning rate is {best_hps.get('learning_rate')}.

""")


# Build the best model

model = tuner.hypermodel.build(best_hps)


# Train the model with augmented data and the best hyperparameters

history = model.fit(datagen.flow(img_train, label_train, batch_size=64),

                    epochs=50,

                    validation_data=(img_test, label_test),

                    callbacks=[tensorboard_callback])


# Determine the best epoch

val_acc_per_epoch = history.history['val_accuracy']

best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1

print('Best epoch: %d' % (best_epoch,))


# Retrain the model on the best epoch

hypermodel = tuner.hypermodel.build(best_hps)

hypermodel.fit(datagen.flow(img_train, label_train, batch_size=32), epochs=best_epoch,

               validation_data=(img_test, label_test))


# Evaluate the model on test data

eval_result = hypermodel.evaluate(img_test, label_test)

print("[test loss, test accuracy]:", eval_result)


This got me a 93% accuracy.


Comments


bottom of page