from statistics import mean
from typing import Union
import tensorflow as tf
from ..firebase_helpers import random_char, write_in_callback
[docs]class EpochEnd(tf.keras.callbacks.Callback):
"""This class is a subclass of the `tf.keras.callbacks.Callback <https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback>`_
abstract base class and overrides the methods :func:`on_epoch_begin` and :func:`on_epoch_end`
allowing logging after epochs in training. This class also uses the
:mod:`.firebase_helpers` to send data to Firebase Realtime database and also
creates a 7 character unique string where the data is pushed on Firebase. Logging
to Firebase is also controllable by ``schedule`` argument, even providing a
granular control for each epoch.
Example:
.. code-block:: python
:caption: Logging data after every epoch
:emphasize-lines: 4,13
:linenos:
import tfwatcher
# here we specify schedule = 1 to log after every epoch
monitor_callback = tfwatcher.callbacks.EpochEnd(schedule=1)
model.compile(
optimizer=...,
loss=...,
# metrics which will be logged
metrics=[...],
)
model.fit(..., callbacks=[monitor_callback])
:param schedule: Use an integer value n to specify logging data every n epochs
the first one being logged by default. Use a list of integers to control
logging with a greater granularity, logs on all epoch numbers specified in
the list taking the first epoch as epoch 1. Using a list will override
loggging on the first epoch by default, defaults to 1
:type schedule: Union[int, list[int]], optional
:param round_time: This argument allows specifying if you want to see the times
on the web-app to be rounded, in most cases you would not be using this, defaults to 2
:type round_time: int, optional
:param print_logs: This argument should only be used when trying to debug if
your logs do not appear in the web-app, if set to ``True`` this would print
out the dictionary which is being pushed to Firebase, defaults to False
:type print_logs: bool, optional
:raises ValueError: If the ``schedule`` is neither an integer or a list.
:raises Exception: If all the values in ``schedule`` list are not convertible
to integer.
"""
def __init__(
self,
schedule: Union[int, list] = 1,
round_time: int = 2,
print_logs: bool = False,
):
super(EpochEnd, self).__init__()
self.schedule = schedule
self.start_time = None
self.end_time = None
self.times = list()
self.round_time = round_time
self.print_logs = print_logs
self.ref_id = random_char(7)
print(f"Use this ID to monitor training for this session: {self.ref_id}")
self.is_int = False
self.is_list = False
if isinstance(self.schedule, int):
self.is_int = True
elif isinstance(self.schedule, list):
self.is_list = True
else:
raise ValueError("schedule should either be an integer or a list")
if self.is_list:
try:
self.schedule = list(map(int, self.schedule))
except (ValueError, TypeError) as err:
raise Exception(
"All elements in the list should be convertible to int: {}".format(
err
)
)
[docs] def on_epoch_begin(self, epoch: int, logs: dict = None):
"""Overrides the `tf.keras.callbacks.Callback.on_epoch_begin <https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback#on_epoch_begin>`_
method which is called at the start of an epoch. This function should only be
called during TRAIN mode.
:param epoch: Index of epoch
:type epoch: int
:param logs: Currently no data is passed to this argument since there are no
logs during the start of an epoch, defaults to None
:type logs: dict, optional
"""
self.start_time = tf.timestamp()
[docs] def on_epoch_end(self, epoch: int, logs: dict = None):
"""Overrides the `tf.keras.callbacks.Callback.on_epoch_end <https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback#on_epoch_end>`_
method which is called at the end of an epoch. This function should only be
called during TRAIN mode. This method adds the epoch number, the average time
taken and pushes it to Firebase using the :mod:`.firebase_helpers` module.
:param epoch: Index of epoch
:type epoch: int
:param logs: Metric results for this training epoch, and for the validation
epoch if validation is performed. Validation result keys are prefixed with
``val_``. For training epoch, the values of the Model's metrics are
returned. Example : ``{'loss': 0.2, 'accuracy': 0.7}``, defaults to None
:type logs: dict, optional
"""
self.end_time = tf.timestamp()
# Use Python built in functions to allow using in @tf.function see
# https://github.com/tensorflow/tensorflow/issues/27491#issuecomment-890887810
time = float(self.end_time - self.start_time)
self.times.append(time)
# Since we have similar logging code use the fact that if first argument of and is False Python doesn't
# execute the second argument
if (
(self.is_int and ((epoch + 1) % self.schedule == 0))
or (self.is_list and ((epoch + 1) in self.schedule))
) or (epoch == 0):
data = logs
data["epoch"] = epoch + 1
data["batch"] = False
data["avg_time"] = round(mean(self.times), self.round_time)
write_in_callback(data=data, ref_id=self.ref_id)
data["time"] = self.times
if self.print_logs:
print(data)
self.times = list()