From d618c5a298134be83c8095e121635e2367621ae8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jes=C3=BAs=20P=C3=A9rez=20Lorenzo?=
 <jesusperezlorenzo@rigzin.local>
Date: Sun, 26 Jan 2025 23:59:44 +0000
Subject: [PATCH] chore: add lib_perfspec.py

---
 learning/python/lib_perfspec.py | 438 ++++++++++++++++++++++++++++++++
 1 file changed, 438 insertions(+)
 create mode 100644 learning/python/lib_perfspec.py
diff --git a/learning/python/lib_perfspec.py b/learning/python/lib_perfspec.py
new file mode 100644
index 0000000..73b6197
--- /dev/null
+++ b/learning/python/lib_perfspec.py
@@ -0,0 +1,438 @@
+import marimo
+
+__generated_with = "0.10.16"
+app = marimo.App(width="medium")
+
+
+@app.cell(hide_code=True)
+def _():
+    def app_vars():
+       app = dict(
+            author_name='Jesús Pérez', 
+            author_link='mailto:info@jesusperez.pro', 
+            name='PerfSPEC Learning 🍃',
+            train_mode = 'LSTM',
+            date="2025",
+            version="1.0",
+            repo='https://repo.jesusperez.pro/jesus/perfspec-learning',
+            about='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/about',    
+            intro='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/intro',
+            image='https://repo.jesusperez.pro/jesus/perfspec-learning/media/branch/main/imgs/prefSPEC-learning.png',
+            )
+       return app
+    return (app_vars,)
+
+
+@app.cell(hide_code=True)
+def title(app_vars):
+    import marimo as mo
+
+    notebook_name = 'lib_perfspec.py'
+
+    _app = app_vars()
+
+    mo.md(
+        f"""
+            {mo.hstack([
+                mo.md(f"<small>{notebook_name} | vers: {_app['version']} {_app['date']} </small>"),
+                mo.md(f"[{_app['author_name']}]({_app['author_link']})"),
+            ])}
+            {mo.vstack(items=[
+                mo.md(f"<br>"),
+                mo.md(f"# {_app['name']}"),
+                mo.md(f'<center><img src="{_app['image']}" width="300" style="border-radius: 20px"; /></center>'),
+            ],justify='center',gap=0.)}
+            It has been design as a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) approach to put `thing` in only one place 
+        """
+    )
+    return mo, notebook_name
+
+
+@app.cell(hide_code=True)
+def perfspec_header(mo, perfspec):
+    def header(notebook_name):
+        return f"""
+            {mo.hstack([
+                mo.md(f"<small>{notebook_name} | vers: {perfspec['app']['version']} {perfspec['app']['date']} </small>"),
+                perfspec['app']['main_menu'],
+                mo.md(f"[{perfspec['app']['author_name']}]({perfspec['app']['author_link']})"),
+            ])}
+            {mo.vstack(items=[
+                mo.md(f"<br>"),
+                mo.md(f"# {perfspec['app']['name']}"),
+                mo.md(f'<center><img src="{perfspec['app']['image']}" width="300" style="border-radius: 20px;"/></center>'),
+            ],justify='center',gap=0.)}
+            """
+    def intro(notebook_name):
+        return f"""
+            ## Intro
+
+            [{notebook_name}]({perfspec['defaults']['models_dirpath'] / notebook_name}") __notebook__ is created for `trained model PREDICTIONS` with [{perfspec['app']['train_mode']}](https://en.wikipedia.org/wiki/Long_short-term_memory) using [Tensor Keras](https://www.tensorflow.org/guide/keras)
+
+            Settings are defined in: [lib_prefspec.py]({mo.notebook_dir()} / lib_perfspec.py)
+
+            Values can be overwritten by using **command-line** see current values below
+         """
+    return header, intro
+
+
+@app.cell(hide_code=True)
+def perfspec_vars(app_vars, mo):
+    from pathlib import Path
+
+    perfspec = dict(
+      defaults = dict(
+            epochs=400,
+            train_size=0.8,
+            sequence_length=2,
+            model_path="perfSPEC_model.keras",
+            actions_path="actions-dataset-audit.txt",
+            history_path="history.json",
+            checkpoint_path="model_at_epoch_{epoch:02d}.keras",
+            checkpoint_mode="epochs",   # alternative "weights"
+            pred_input="delete_apiservices",
+            verbose=None,
+            data_dirpath=mo.notebook_dir() / "../../data",
+            models_dirpath=mo.notebook_dir() / "../../models",
+            checkpoints_dirpath=mo.notebook_dir() / "../../models/checkpoints",
+        ),
+        app = app_vars(),
+    )
+    if not Path(perfspec["defaults"]["models_dirpath"]).exists():
+        exit(
+            f"Trained models path not found: {perfspec['defaults']['models_dirpath']}"
+        )
+
+    if not Path(perfspec["defaults"]["data_dirpath"]).exists():
+        exit(f"Data path not found: {perfspec['defaults']['data_dirpath']}")
+
+    if not Path(perfspec["defaults"]["checkpoints_dirpath"]).exists():
+        exit(
+            f"Checkpoints path not found: {perfspec['defaults']['checkpoints_dirpath']}"
+        )
+
+    perfspec["app"]['main_menu'] = mo.nav_menu(
+        {
+            "#main": "🍃 Main",
+            f"{perfspec['app']['intro']}": f"{mo.icon('lucide:shield-check')} Intro",
+            f"{perfspec['app']['about']}": f"{mo.icon('lucide:package')} About",
+            f"{perfspec['app']['repo']}": f"{mo.icon('lucide:github')} Repo",
+        }
+    )
+
+    perfspec["settings"] = dict(
+        # Number of epochs
+        epochs=mo.cli_args().get("epochs") or perfspec["defaults"]["epochs"],
+        checkpoint_mode = mo.cli_args().get("checkpoint_mode") or perfspec["defaults"]["checkpoint_mode"],
+        # Number of train_size
+        train_size=mo.cli_args().get("train_size")
+            or perfspec["defaults"]["train_size"],
+        sequence_length=mo.cli_args().get("sequence")
+            or perfspec["defaults"]["sequence_length"],
+        embedding_dim=50,  # Dimensionality of embedding vectors
+        input_length=mo.cli_args().get("sequence")
+            or perfspec["defaults"]["sequence_length"],
+        lstm_units_1=256,  # Depth of the first LSTM layer
+        lstm_units_2=128,  # Depth of the second LSTM layer
+        dropout_rate=0.2,  # Dropout rate
+        batch_size=16,  # Adjusted for smaller datasets
+        model_history_filepath=perfspec["defaults"]["models_dirpath"]
+                           / perfspec["defaults"]["history_path"],
+        model_filepath=mo.cli_args().get("model_path")
+                or perfspec["defaults"]["models_dirpath"]
+                / perfspec["defaults"]["model_path"],
+        checkpoint_filepath=mo.cli_args().get("checkpoint_path")
+                or perfspec["defaults"]["checkpoints_dirpath"] 
+                / perfspec["defaults"]["checkpoint_path"],
+        actions_filepath=mo.cli_args().get("actions_path")
+                or perfspec["defaults"]["data_dirpath"]
+                / perfspec["defaults"]["actions_path"],
+        input_str=mo.cli_args().get("input") or perfspec["defaults"]["pred_input"],
+    )
+
+    if mo.cli_args().get("verbose") == "":
+        perfspec["settings"]["verbose"] = 1
+    else:
+        perfspec["settings"]["verbose"] = (
+            mo.cli_args().get("verbose") or perfspec["defaults"]["verbose"]
+        )
+
+    if not Path(perfspec["settings"]["actions_filepath"]).exists():
+        exit(
+            f"Actions path not found:  {perfspec['settings']['actions_filepath']}"
+        )
+
+    mo.md(
+        f"""
+        # Vars settings 🍃
+
+        **perfspec** includes:  `defaults` and `settings` to keep all vars inside one dictionary
+
+        Main configurable variables some of them can be overload from **command-line**
+
+        Be sur `defaults` path exist for:
+
+        | variable   | path   |
+        |:---------- | :---------------- |
+        | data_dirpath | {perfspec["defaults"]["data_dirpath"]} |
+        | checkpoints_dirpath | {perfspec["defaults"]["checkpoints_dirpath"]} |
+        | actions_filepath | {perfspec["settings"]["actions_filepath"]} |
+
+        These path can be changed by using full file path with **command-line** arguments
+
+        """
+    )
+    return Path, perfspec
+
+
+@app.cell(hide_code=True)
+def perfspec_args(mo, perfspec):
+    if mo.cli_args().get("version") == "":
+       print (f"PerfSPEC Learning: {perfspec['defaults']['version']}")
+       exit()
+    if mo.cli_args().get("help") == "":
+       print ("PerfSPEC Learning:\n")
+       print (f"  To predict sequences use:\n\t\t --input value,value1 (default: {perfspec['defaults']['pred_input']})")  
+       print (f"  Epochs to train use:\n\t\t --epochs num-value (default: {perfspec['defaults']['epochs']})")  
+       print (f"  For train size use:\n\t\t --train_size decimal-value (default: {perfspec['defaults']['train_size']}i)")  
+       print (f"  For sequence size in train model use:\n\t\t --sequence num-value (default: {perfspec['defaults']['train_size']})")  
+       print (f"  For checkpoint mode (epochs,weights) in train model use:\n\t\t --checkpoint_mode value (default: {perfspec['defaults']['checkpoint_mode']})")  
+       print (f"  For model_filepath use:\n\t\t --model_path model_filepath (default: {perfspec['defaults']['model_path']})")  
+       print (f"  For checkpoint_filepath use:\n\t\t --checkpoint_path checkpoint_filepath (default: {perfspec['defaults']['checkpoint_path']})")  
+       print (f"  For actions_filepath use:\n\t\t --actions_path actions_filepath\n\t\t (default: {perfspec['settings']['actions_filepath']})")  
+       print (f"  For verbose mode use:\n\t\t --verbose (default: {perfspec['defaults']['verbose']})")  
+       exit()
+
+    mo.md(
+        f"""
+        # Arguments 🍃
+
+        Main arguments parsing some of values can be overload from **command-line**
+        """
+    )
+    return
+
+
+@app.cell(hide_code=True)
+def perfspec_cli_ops(mo):
+    def out_cli_ops(notebook_name):
+        return(
+        f"""
+
+        <h3>Command-Line options</h3>
+
+        <h4> Input value for prediction trained model </h4> 
+          ```python {mo.notebook_location()}/{notebook_name} -- --input `value, value-1`  ```
+        <h4> Model filepath    </h4> 
+          ```python {mo.notebook_location()}/{notebook_name} -- --model_path `model-filepath`  ```
+        <h4> Checkpoints model filepath </h4>
+          ```python {mo.notebook_location()}/{notebook_name} -- --checkpoint_path `checkpoint-filepath`  ```
+        <h4> Actions filepath </h4>    
+          ```python {mo.notebook_location()}/{notebook_name} -- --actions_path `actions-filepath`  ```
+        <h4> Sequence to train model </h4>
+          ```python {mo.notebook_location()}/{notebook_name} -- ---sequence `num-value`  ```
+        <h4> Epochs to train model </h4>
+          ```python {mo.notebook_location()}/{notebook_name} -- ---epochs `num-value`  ```
+        <h4> Train model size </h4>
+          ```python {mo.notebook_location()}/{notebook_name} -- ---train_size `decimal-value`  ```
+        <h4> Train model checkpoint modes </h4>
+          ```python {mo.notebook_location()}/{notebook_name} -- ---checkpoint_mode `decimal-value`  ```
+        <h4> Verbose </h4>
+          ```python {mo.notebook_location()}/{notebook_name} -- --verbose  ```
+        """
+        )
+
+    mo.md(
+        f"""
+        # Command-line options 🍃
+        """
+    )
+    return (out_cli_ops,)
+
+
+@app.cell(hide_code=True)
+def perfspec_out_settings(mo, perfspec):
+    def out_settings(notebook_name):
+        return(
+            f"""            
+            <h4>Defaults </h4>
+
+            | variable   | value  | descripción   |
+            |:---------- | :----  | :------------ |
+            | models dir path | {perfspec['defaults']['models_dirpath']} | models path |
+            | model file | **{perfspec['defaults']['model_path']}** | trained model filepath |
+            | checkpoints model dir path | {perfspec['defaults']['checkpoints_dirpath']} | checkpoint models path |
+            | checkpoint model file | **{perfspec['defaults']['checkpoint_path']}** | checkpoint trained model filepath |
+            | data_path  | {perfspec['defaults']['data_dirpath']} | data path | 
+            | actions file| **{perfspec['defaults']['actions_path']}** | actions filepath |
+            | history file| **{perfspec['defaults']['history_path']}** | history filepath |
+            | input |  **{perfspec['defaults']['pred_input']}** | input value for prediction |
+            | verbose |  None | show detail info | 
+
+            <h4> Train model settings </h4>
+
+            | variable   | value  | descripción   |
+            |:---------- | :----  | :------------ |
+            | epochs     | **{perfspec['defaults']['epochs']}** | or change in `comman-line` <br> with **--epochs** `num-value` |
+            | train_size     | **{perfspec['defaults']['train_size']}** | or change in `comman-line` <br> with **--train_size** `decimal-value` |
+            | sequence_length | **{perfspec['defaults']['sequence_length']}** | Length of sequencs <br>or change in `comman-line` <br> with **--sequence** `num-value` |
+            | checkpoint_mode | **{perfspec['defaults']['checkpoint_mode']}** | Checkpoint_mode (epochs | weights)<br>or change in `comman-line` <br> with **--checkpoint_mode** `num-value` |
+            | input_length |  {perfspec['defaults']['sequence_length']} | Length of input sequencs |
+            | lstm_units_1 |  {perfspec['settings']['lstm_units_1']} | Depth of the first LSTM layer |
+            | lstm_units_2 |  {perfspec['settings']['lstm_units_2']} | Depth of the second LSTM layer |
+            | dropout_rate |  {perfspec['settings']['dropout_rate']} | Dropout rate |
+            | batch_size |  {perfspec['settings']['batch_size']} | Adjusted for smaller datasets |
+
+            """
+        )
+    mo.md(
+        f"""
+        # Default values 🍃
+        """
+    )
+    return (out_settings,)
+
+
+@app.cell(hide_code=True)
+def perfspec_load_actions(mo):
+    def load_actions(actions_path, verbose):
+        import numpy as np
+        from pathlib import Path
+        from tensorflow.keras.preprocessing.sequence import pad_sequences
+        from sklearn.preprocessing import LabelEncoder
+        actions = []
+        unique_actions = []
+        if Path(actions_path).exists(): 
+            if verbose != None: #perfspec['settings']['verbose'] != None and mo.cli_args().get("verbose") != None:
+                print (f"Actions loaded from: {actions_path}")  
+            with open(actions_path, 'r') as file:
+                for line in file:
+                    action = line.rstrip()
+                    actions.append(action)
+                    if action not in unique_actions:
+                       unique_actions.append(action)           
+        label_encoder = LabelEncoder()
+        encoded_actions = label_encoder.fit_transform(actions)
+        return (actions, unique_actions, label_encoder, encoded_actions)    
+    mo.md(
+        f"""
+        # Load actions and encoders 🍃
+        """
+    )
+    return (load_actions,)
+
+
+@app.cell(hide_code=True)
+def perfspec_input_sequence(mo, perfspec):
+    def get_input_sequence(input_str, unique_actions):
+        if input_str == "":
+           input_str = perfspec['settings']['input_str']
+        else:    
+            input_sequence = []
+            for str in input_str.split(","):
+                if str not in unique_actions:
+                    if perfspec['settings']['verbose'] != None or mo.cli_args().get("verbose") != None:
+                       print (f"Input not exist: {str}")  
+                else:
+                    input_sequence.append(str)
+        return input_sequence
+    mo.md(
+        f"""
+        # Input sequence 🍃
+
+        Control **input sequence** to be included in __actions data__ in the model
+        """
+    )
+    return (get_input_sequence,)
+
+
+@app.cell(hide_code=True)
+def perfspec_load_model_from_path(mo, perfspec):
+    def load_model_from_path(model_path, verbose):
+        # Load the model
+        from keras.models import load_model
+        from train_perfspec import (
+            PrecisionMetric,
+            RecallMetric,
+            F1ScoreMetric
+        )
+        # Set via options  
+        custom_objects = {
+    #        "PrecisionMetric": PrecisionMetric,
+    #        "RecallMetric": RecallMetric,
+    #        "F1ScoreMetric": F1ScoreMetric,
+        }
+        if model_path.exists(): 
+           model = load_model(
+               model_path,
+               custom_objects=custom_objects
+           )
+           if verbose != None:
+              print (f"Trained model loaded from: {model_path}")
+           return model
+
+    mo.md(
+        f"""
+        ## Load model  🍃
+
+        Load trained model from file **model_path** 
+
+        Default path: {perfspec['settings']['model_filepath']}
+        """
+    )
+    return (load_model_from_path,)
+
+
+@app.cell(hide_code=True)
+def perfspec_predict(mo, perfspec):
+    def predict_action(model, sequence_length, input_sequence, label_encoder, verbose):
+        import numpy as np
+        from sklearn.preprocessing import LabelEncoder
+        from tensorflow.keras.utils import to_categorical
+        from tensorflow.keras.preprocessing.sequence import pad_sequences
+
+        predict_encoder = LabelEncoder()
+        vocab_size = len(label_encoder.classes_)  # Total number of unique actions
+
+        # Encode the input sequence using label_encoder
+        encoded_input = predict_encoder.fit_transform(input_sequence)
+
+        for j in range(len(encoded_input) - sequence_length):
+            encoded_input.append(encoded_input[j:j + sequence_length])  # Input sequence
+
+        # Pad the input sequence to the expected sequence length
+        encoded_input = pad_sequences([encoded_input], maxlen=sequence_length, padding='pre')
+        encoded_input = np.expand_dims(encoded_input, axis=-1)
+
+        predicted = {}
+        _verbose = verbose if verbose != "-1" else None
+
+        predicted['probabilities'] = model.predict(encoded_input, verbose=_verbose)
+        predicted['index'] = np.argmax(predicted['probabilities']) #, axis=-1)[0]
+        predicted['max'] = np.max(predicted['probabilities'], axis=-1)[0]
+        predicted['max_value'] = "{:2.4f}".format(predicted['max'])
+        if 'index' in predicted:
+            predicted['action'] = label_encoder.inverse_transform(np.ravel(predicted['index']))
+        if verbose != "-1":
+            if verbose != None or mo.cli_args().get("verbose") != None:
+               print(f"Predicted next action: {predicted['action'][0]} with: {predicted['max_value']}")
+            elif verbose == None and not mo.running_in_notebook():
+               print(f"{predicted['action'][0]}")
+        return (encoded_input,predicted)
+
+    mo.md(
+        f"""
+        # Predict action  🍃
+
+        Predict action from trained model
+
+        Default value: **{perfspec['settings']['input_str']}**
+
+        For **command-line** it can be provided via **--input** `value` (can use several comma separated)
+        """
+    )
+    return (predict_action,)
+
+
+if __name__ == "__main__":
+    app.run()