perfspec-learning/learning/python/lib_perfspec.py

import marimo

__generated_with = "0.10.17"
app = marimo.App(width="medium")


@app.cell(hide_code=True)
def _():
    def app_vars():
       app = dict(
            author_name='Jesús Pérez',
            author_link='mailto:info@jesusperez.pro',
            name='PerfSPEC Learning 🍃',
            train_mode = 'LSTM',
            date="2025",
            version="1.0",
            repo='https://repo.jesusperez.pro/jesus/perfspec-learning',
            about='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/about.md',
            intro='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/intro.md',
            image='https://repo.jesusperez.pro/jesus/perfspec-learning/media/branch/main/imgs/perfSPEC-learning.png',
            )
       return app
    return (app_vars,)


@app.cell(hide_code=True)
def title(app_vars):
    import marimo as mo

    notebook_name = 'lib_perfspec.py'

    _app = app_vars()

    mo.md(
        f"""
            {mo.hstack([
                mo.md(f"<small>{notebook_name} | vers: {_app['version']} {_app['date']} </small>"),
                mo.md(f"[{_app['author_name']}]({_app['author_link']})"),
            ])}
            {mo.vstack(items=[
                mo.md(f"<br>"),
                mo.md(f"# {_app['name']}"),
                mo.md(f'<center><img src="{_app['image']}" width="300" style="border-radius: 20px"; /></center>'),
            ],justify='center',gap=0.)}
            It has been design as a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) approach to put `thing` in only one place
        """
    )
    return mo, notebook_name


@app.cell(hide_code=True)
def perfspec_header(mo, perfspec):
    def header(notebook_name):
        return f"""
            {mo.hstack([
                mo.md(f"<small>{notebook_name} | vers: {perfspec['app']['version']} {perfspec['app']['date']} </small>"),
                perfspec['app']['main_menu'],
                mo.md(f"[{perfspec['app']['author_name']}]({perfspec['app']['author_link']})"),
            ])}
            {mo.vstack(items=[
                mo.md(f"<br>"),
                mo.md(f"# {perfspec['app']['name']}"),
                mo.md(f'<center><img src="{perfspec['app']['image']}" width="300" style="border-radius: 20px;"/></center>'),
            ],justify='center',gap=0.)}
            """
    def intro(notebook_name):
        return f"""
            ## Intro

            [{notebook_name}]({perfspec['defaults']['models_dirpath'] / notebook_name}") __notebook__ is created for `trained model PREDICTIONS` with [{perfspec['app']['train_mode']}](https://en.wikipedia.org/wiki/Long_short-term_memory) using [Tensor Keras](https://www.tensorflow.org/guide/keras)

            Settings are defined in: [lib_prefspec.py]({mo.notebook_dir()} / lib_perfspec.py)

            Values can be overwritten by using **command-line** see current values below
         """
    return header, intro


@app.cell(hide_code=True)
def perfspec_vars(app_vars, mo):
    from pathlib import Path

    perfspec = dict(
      defaults = dict(
            epochs=300,
            train_size=0.8,
            sequence_length=2,
            model_path="perfSPEC_model.keras",
            actions_path="actions-dataset-audit.txt",
            history_path="history.json",
            checkpoint_path="model_at_epoch_{epoch:02d}.keras",
            checkpoint_mode="epochs",   # alternative "weights"
            pred_input="delete_apiservices",
            verbose=None,
            data_dirpath=mo.notebook_dir() / "../../data",
            models_dirpath=mo.notebook_dir() / "../../models",
            checkpoints_dirpath=mo.notebook_dir() / "../../models/checkpoints",
        ),
        app = app_vars(),
    )
    if not Path(perfspec["defaults"]["models_dirpath"]).exists():
        exit(
            f"Trained models path not found: {perfspec['defaults']['models_dirpath']}"
        )

    if not Path(perfspec["defaults"]["data_dirpath"]).exists():
        exit(f"Data path not found: {perfspec['defaults']['data_dirpath']}")

    if not Path(perfspec["defaults"]["checkpoints_dirpath"]).exists():
        exit(
            f"Checkpoints path not found: {perfspec['defaults']['checkpoints_dirpath']}"
        )

    perfspec["app"]['main_menu'] = mo.nav_menu(
        {
            "#main": "🍃 Main",
            f"{perfspec['app']['intro']}": f"{mo.icon('lucide:shield-check')} Intro",
            f"{perfspec['app']['about']}": f"{mo.icon('lucide:package')} About",
            f"{perfspec['app']['repo']}": f"{mo.icon('lucide:github')} Repo",
        }
    )

    perfspec["settings"] = dict(
        # Number of epochs
        epochs=mo.cli_args().get("epochs") or perfspec["defaults"]["epochs"],
        checkpoint_mode = mo.cli_args().get("checkpoint_mode") or perfspec["defaults"]["checkpoint_mode"],
        # Number of train_size
        train_size=mo.cli_args().get("train_size")
            or perfspec["defaults"]["train_size"],
        sequence_length=mo.cli_args().get("sequence")
            or perfspec["defaults"]["sequence_length"],
        embedding_dim=50,  # Dimensionality of embedding vectors
        input_length=mo.cli_args().get("sequence")
            or perfspec["defaults"]["sequence_length"],
        lstm_units_1=256,  # Depth of the first LSTM layer
        lstm_units_2=128,  # Depth of the second LSTM layer
        dropout_rate=0.2,  # Dropout rate
        batch_size=16,  # Adjusted for smaller datasets
        model_history_filepath=perfspec["defaults"]["models_dirpath"]
                           / perfspec["defaults"]["history_path"],
        model_filepath=mo.cli_args().get("model_path")
                or perfspec["defaults"]["models_dirpath"]
                / perfspec["defaults"]["model_path"],
        checkpoint_filepath=mo.cli_args().get("checkpoint_path")
                or perfspec["defaults"]["checkpoints_dirpath"]
                / perfspec["defaults"]["checkpoint_path"],
        actions_filepath=mo.cli_args().get("actions_path")
                or perfspec["defaults"]["data_dirpath"]
                / perfspec["defaults"]["actions_path"],
        input_str=mo.cli_args().get("input") or perfspec["defaults"]["pred_input"],
        main_audit_log = perfspec['defaults']['data_dirpath'] / "main-audit-logs.log",
        raw_audit_log = perfspec['defaults']['data_dirpath'] / "raw-audit-logs.log",
        actions_distrib_filepath = perfspec['defaults']['data_dirpath'] / 'actions_distribution.pdf',
        actions_distrib_format = 'pdf'
    )

    if mo.cli_args().get("verbose") == "":
        perfspec["settings"]["verbose"] = 1
    else:
        perfspec["settings"]["verbose"] = (
            mo.cli_args().get("verbose") or perfspec["defaults"]["verbose"]
        )

    if not Path(perfspec["settings"]["actions_filepath"]).exists():
        exit(
            f"Actions path not found:  {perfspec['settings']['actions_filepath']}"
        )

    mo.md(
        f"""
        # Vars settings 🍃

        **perfspec** includes:  `defaults` and `settings` to keep all vars inside one dictionary

        Main configurable variables some of them can be overload from **command-line**

        Be sur `defaults` path exist for:

        | variable   | path   |
        |:---------- | :---------------- |
        | data_dirpath | {perfspec["defaults"]["data_dirpath"]} |
        | checkpoints_dirpath | {perfspec["defaults"]["checkpoints_dirpath"]} |
        | actions_filepath | {perfspec["settings"]["actions_filepath"]} |

        These path can be changed by using full file path with **command-line** arguments

        """
    )
    return Path, perfspec


@app.cell(hide_code=True)
def perfspec_args(mo, perfspec):
    if mo.cli_args().get("version") == "":
       print (f"PerfSPEC Learning: {perfspec['defaults']['version']}")
       exit()
    if mo.cli_args().get("help") == "":
       print ("PerfSPEC Learning:\n")
       print (f"  To predict sequences use:\n\t\t --input value,value1 (default: {perfspec['defaults']['pred_input']})")
       print (f"  Epochs to train use:\n\t\t --epochs num-value (default: {perfspec['defaults']['epochs']})")
       print (f"  For train size use:\n\t\t --train_size decimal-value (default: {perfspec['defaults']['train_size']}i)")
       print (f"  For sequence size in train model use:\n\t\t --sequence num-value (default: {perfspec['defaults']['train_size']})")
       print (f"  For checkpoint mode (epochs,weights) in train model use:\n\t\t --checkpoint_mode value (default: {perfspec['defaults']['checkpoint_mode']})")
       print (f"  For model_filepath use:\n\t\t --model_path model_filepath (default: {perfspec['defaults']['model_path']})")
       print (f"  For checkpoint_filepath use:\n\t\t --checkpoint_path checkpoint_filepath (default: {perfspec['defaults']['checkpoint_path']})")
       print (f"  For actions_filepath use:\n\t\t --actions_path actions_filepath\n\t\t (default: {perfspec['settings']['actions_filepath']})")
       print (f"  For verbose mode use:\n\t\t --verbose (default: {perfspec['defaults']['verbose']})")
       exit()

    mo.md(
        f"""
        # Arguments 🍃

        Main arguments parsing some of values can be overload from **command-line**
        """
    )
    return


@app.cell(hide_code=True)
def perfspec_cli_ops(mo):
    def out_cli_ops(notebook_name):
        return(
        f"""

        <h3>Command-Line options</h3>

        <h4> Input value for prediction trained model </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- --input `value, value-1`  ```
        <h4> Model filepath    </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- --model_path `model-filepath`  ```
        <h4> Checkpoints model filepath </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- --checkpoint_path `checkpoint-filepath`  ```
        <h4> Actions filepath </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- --actions_path `actions-filepath`  ```
        <h4> Sequence to train model </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- ---sequence `num-value`  ```
        <h4> Epochs to train model </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- ---epochs `num-value`  ```
        <h4> Train model size </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- ---train_size `decimal-value`  ```
        <h4> Train model checkpoint modes </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- ---checkpoint_mode `decimal-value`  ```
        <h4> Verbose </h4>
          ```python {mo.notebook_location()}/{notebook_name} -- --verbose  ```
        """
        )

    mo.md(
        f"""
        # Command-line options 🍃
        """
    )
    return (out_cli_ops,)


@app.cell(hide_code=True)
def perfspec_out_settings(mo, perfspec):
    def out_settings(notebook_name):
        return(
            f"""
            <h4>Defaults </h4>

            | variable   | value  | descripción   |
            |:---------- | :----  | :------------ |
            | models dir path | {perfspec['defaults']['models_dirpath']} | models path |
            | model file | **{perfspec['defaults']['model_path']}** | trained model filepath |
            | checkpoints model dir path | {perfspec['defaults']['checkpoints_dirpath']} | checkpoint models path |
            | checkpoint model file | **{perfspec['defaults']['checkpoint_path']}** | checkpoint trained model filepath |
            | data_path  | {perfspec['defaults']['data_dirpath']} | data path |
            | actions file| **{perfspec['defaults']['actions_path']}** | actions filepath |
            | history file| **{perfspec['defaults']['history_path']}** | history filepath |
            | input |  **{perfspec['defaults']['pred_input']}** | input value for prediction |
            | verbose |  None | show detail info |

            <h4> Train model settings </h4>

            | variable   | value  | descripción   |
            |:---------- | :----  | :------------ |
            | epochs     | **{perfspec['defaults']['epochs']}** | or change in `comman-line` <br> with **--epochs** `num-value` |
            | train_size     | **{perfspec['defaults']['train_size']}** | or change in `comman-line` <br> with **--train_size** `decimal-value` |
            | sequence_length | **{perfspec['defaults']['sequence_length']}** | Length of sequencs <br>or change in `comman-line` <br> with **--sequence** `num-value` |
            | checkpoint_mode | **{perfspec['defaults']['checkpoint_mode']}** | Checkpoint_mode (epochs | weights)<br>or change in `comman-line` <br> with **--checkpoint_mode** `num-value` |
            | input_length |  {perfspec['defaults']['sequence_length']} | Length of input sequencs |
            | lstm_units_1 |  {perfspec['settings']['lstm_units_1']} | Depth of the first LSTM layer |
            | lstm_units_2 |  {perfspec['settings']['lstm_units_2']} | Depth of the second LSTM layer |
            | dropout_rate |  {perfspec['settings']['dropout_rate']} | Dropout rate |
            | batch_size |  {perfspec['settings']['batch_size']} | Adjusted for smaller datasets |

            """
        )
    mo.md(
        f"""
        # Default values 🍃
        """
    )
    return (out_settings,)


@app.cell(hide_code=True)
def perfspec_load_actions(mo):
    def load_actions(actions_path, verbose):
        import numpy as np
        from pathlib import Path
        from tensorflow.keras.preprocessing.sequence import pad_sequences
        from sklearn.preprocessing import LabelEncoder
        actions = []
        unique_actions = []
        if Path(actions_path).exists():
            if verbose != None: #perfspec['settings']['verbose'] != None and mo.cli_args().get("verbose") != None:
                print (f"Actions loaded from: {actions_path}")
            with open(actions_path, 'r') as file:
                for line in file:
                    action = line.rstrip()
                    actions.append(action)
                    if action not in unique_actions:
                       unique_actions.append(action)
        label_encoder = LabelEncoder()
        encoded_actions = label_encoder.fit_transform(actions)
        return (actions, unique_actions, label_encoder, encoded_actions)
    mo.md(
        f"""
        # Load actions and encoders 🍃
        """
    )
    return (load_actions,)


@app.cell(hide_code=True)
def perfspec_input_sequence(mo, perfspec):
    def get_input_sequence(input_str, unique_actions):
        if input_str == "":
           input_str = perfspec['settings']['input_str']
        else:
            input_sequence = []
            for str in input_str.split(","):
                if str not in unique_actions:
                    if perfspec['settings']['verbose'] != None or mo.cli_args().get("verbose") != None:
                       print (f"Input not exist: {str}")
                else:
                    input_sequence.append(str)
        return input_sequence
    mo.md(
        f"""
        # Input sequence 🍃

        Control **input sequence** to be included in __actions data__ in the model
        """
    )
    return (get_input_sequence,)


@app.cell(hide_code=True)
def perfspec_load_model_from_path(mo, perfspec):
    def load_model_from_path(model_path, verbose):
        # Load the model
        from keras.models import load_model
        from train_perfspec import (
            PrecisionMetric,
            RecallMetric,
            F1ScoreMetric
        )
        # Set via options
        custom_objects = {
    #        "PrecisionMetric": PrecisionMetric,
    #        "RecallMetric": RecallMetric,
    #        "F1ScoreMetric": F1ScoreMetric,
        }
        if model_path.exists():
           model = load_model(
               model_path,
               custom_objects=custom_objects
           )
           if verbose != None:
              print (f"Trained model loaded from: {model_path}")
           return model

    mo.md(
        f"""
        ## Load model  🍃

        Load trained model from file **model_path**

        Default path: {perfspec['settings']['model_filepath']}
        """
    )
    return (load_model_from_path,)


@app.cell(hide_code=True)
def perfspec_predict(mo, perfspec):
    def predict_action(model, sequence_length, input_sequence, label_encoder, verbose):
        import numpy as np
        from sklearn.preprocessing import LabelEncoder
        from tensorflow.keras.utils import to_categorical
        from tensorflow.keras.preprocessing.sequence import pad_sequences

        predict_encoder = LabelEncoder()
        vocab_size = len(label_encoder.classes_)  # Total number of unique actions

        # Encode the input sequence using label_encoder
        encoded_input = predict_encoder.fit_transform(input_sequence)

        for j in range(len(encoded_input) - sequence_length):
            encoded_input.append(encoded_input[j:j + sequence_length])  # Input sequence

        # Pad the input sequence to the expected sequence length
        encoded_input = pad_sequences([encoded_input], maxlen=sequence_length, padding='pre')
        encoded_input = np.expand_dims(encoded_input, axis=-1)

        predicted = {}
        _verbose = verbose if verbose != "-1" else None

        predicted['probabilities'] = model.predict(encoded_input, verbose=_verbose)
        predicted['index'] = np.argmax(predicted['probabilities']) #, axis=-1)[0]
        predicted['max'] = np.max(predicted['probabilities'], axis=-1)[0]
        predicted['max_value'] = "{:2.4f}".format(predicted['max'])
        if 'index' in predicted:
            predicted['action'] = label_encoder.inverse_transform(np.ravel(predicted['index']))
        if verbose != "-1":
            if verbose != None or mo.cli_args().get("verbose") != None:
               print(f"Predicted next action: {predicted['action'][0]} with: {predicted['max_value']}")
            elif verbose == None and not mo.running_in_notebook():
               print(f"{predicted['action'][0]}")
        return (encoded_input,predicted)

    mo.md(
        f"""
        # Predict action  🍃

        Predict action from trained model

        Default value: **{perfspec['settings']['input_str']}**

        For **command-line** it can be provided via **--input** `value` (can use several comma separated)
        """
    )
    return (predict_action,)


if __name__ == "__main__":
    app.run()