From d618c5a298134be83c8095e121635e2367621ae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20P=C3=A9rez=20Lorenzo?= Date: Sun, 26 Jan 2025 23:59:44 +0000 Subject: [PATCH] chore: add lib_perfspec.py --- learning/python/lib_perfspec.py | 438 ++++++++++++++++++++++++++++++++ 1 file changed, 438 insertions(+) create mode 100644 learning/python/lib_perfspec.py diff --git a/learning/python/lib_perfspec.py b/learning/python/lib_perfspec.py new file mode 100644 index 0000000..73b6197 --- /dev/null +++ b/learning/python/lib_perfspec.py @@ -0,0 +1,438 @@ +import marimo + +__generated_with = "0.10.16" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(): + def app_vars(): + app = dict( + author_name='Jesús Pérez', + author_link='mailto:info@jesusperez.pro', + name='PerfSPEC Learning 🍃', + train_mode = 'LSTM', + date="2025", + version="1.0", + repo='https://repo.jesusperez.pro/jesus/perfspec-learning', + about='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/about', + intro='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/intro', + image='https://repo.jesusperez.pro/jesus/perfspec-learning/media/branch/main/imgs/prefSPEC-learning.png', + ) + return app + return (app_vars,) + + +@app.cell(hide_code=True) +def title(app_vars): + import marimo as mo + + notebook_name = 'lib_perfspec.py' + + _app = app_vars() + + mo.md( + f""" + {mo.hstack([ + mo.md(f"{notebook_name} | vers: {_app['version']} {_app['date']} "), + mo.md(f"[{_app['author_name']}]({_app['author_link']})"), + ])} + {mo.vstack(items=[ + mo.md(f"
"), + mo.md(f"# {_app['name']}"), + mo.md(f'
'), + ],justify='center',gap=0.)} + It has been design as a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) approach to put `thing` in only one place + """ + ) + return mo, notebook_name + + +@app.cell(hide_code=True) +def perfspec_header(mo, perfspec): + def header(notebook_name): + return f""" + {mo.hstack([ + mo.md(f"{notebook_name} | vers: {perfspec['app']['version']} {perfspec['app']['date']} "), + perfspec['app']['main_menu'], + mo.md(f"[{perfspec['app']['author_name']}]({perfspec['app']['author_link']})"), + ])} + {mo.vstack(items=[ + mo.md(f"
"), + mo.md(f"# {perfspec['app']['name']}"), + mo.md(f'
'), + ],justify='center',gap=0.)} + """ + def intro(notebook_name): + return f""" + ## Intro + + [{notebook_name}]({perfspec['defaults']['models_dirpath'] / notebook_name}") __notebook__ is created for `trained model PREDICTIONS` with [{perfspec['app']['train_mode']}](https://en.wikipedia.org/wiki/Long_short-term_memory) using [Tensor Keras](https://www.tensorflow.org/guide/keras) + + Settings are defined in: [lib_prefspec.py]({mo.notebook_dir()} / lib_perfspec.py) + + Values can be overwritten by using **command-line** see current values below + """ + return header, intro + + +@app.cell(hide_code=True) +def perfspec_vars(app_vars, mo): + from pathlib import Path + + perfspec = dict( + defaults = dict( + epochs=400, + train_size=0.8, + sequence_length=2, + model_path="perfSPEC_model.keras", + actions_path="actions-dataset-audit.txt", + history_path="history.json", + checkpoint_path="model_at_epoch_{epoch:02d}.keras", + checkpoint_mode="epochs", # alternative "weights" + pred_input="delete_apiservices", + verbose=None, + data_dirpath=mo.notebook_dir() / "../../data", + models_dirpath=mo.notebook_dir() / "../../models", + checkpoints_dirpath=mo.notebook_dir() / "../../models/checkpoints", + ), + app = app_vars(), + ) + if not Path(perfspec["defaults"]["models_dirpath"]).exists(): + exit( + f"Trained models path not found: {perfspec['defaults']['models_dirpath']}" + ) + + if not Path(perfspec["defaults"]["data_dirpath"]).exists(): + exit(f"Data path not found: {perfspec['defaults']['data_dirpath']}") + + if not Path(perfspec["defaults"]["checkpoints_dirpath"]).exists(): + exit( + f"Checkpoints path not found: {perfspec['defaults']['checkpoints_dirpath']}" + ) + + perfspec["app"]['main_menu'] = mo.nav_menu( + { + "#main": "🍃 Main", + f"{perfspec['app']['intro']}": f"{mo.icon('lucide:shield-check')} Intro", + f"{perfspec['app']['about']}": f"{mo.icon('lucide:package')} About", + f"{perfspec['app']['repo']}": f"{mo.icon('lucide:github')} Repo", + } + ) + + perfspec["settings"] = dict( + # Number of epochs + epochs=mo.cli_args().get("epochs") or perfspec["defaults"]["epochs"], + checkpoint_mode = mo.cli_args().get("checkpoint_mode") or perfspec["defaults"]["checkpoint_mode"], + # Number of train_size + train_size=mo.cli_args().get("train_size") + or perfspec["defaults"]["train_size"], + sequence_length=mo.cli_args().get("sequence") + or perfspec["defaults"]["sequence_length"], + embedding_dim=50, # Dimensionality of embedding vectors + input_length=mo.cli_args().get("sequence") + or perfspec["defaults"]["sequence_length"], + lstm_units_1=256, # Depth of the first LSTM layer + lstm_units_2=128, # Depth of the second LSTM layer + dropout_rate=0.2, # Dropout rate + batch_size=16, # Adjusted for smaller datasets + model_history_filepath=perfspec["defaults"]["models_dirpath"] + / perfspec["defaults"]["history_path"], + model_filepath=mo.cli_args().get("model_path") + or perfspec["defaults"]["models_dirpath"] + / perfspec["defaults"]["model_path"], + checkpoint_filepath=mo.cli_args().get("checkpoint_path") + or perfspec["defaults"]["checkpoints_dirpath"] + / perfspec["defaults"]["checkpoint_path"], + actions_filepath=mo.cli_args().get("actions_path") + or perfspec["defaults"]["data_dirpath"] + / perfspec["defaults"]["actions_path"], + input_str=mo.cli_args().get("input") or perfspec["defaults"]["pred_input"], + ) + + if mo.cli_args().get("verbose") == "": + perfspec["settings"]["verbose"] = 1 + else: + perfspec["settings"]["verbose"] = ( + mo.cli_args().get("verbose") or perfspec["defaults"]["verbose"] + ) + + if not Path(perfspec["settings"]["actions_filepath"]).exists(): + exit( + f"Actions path not found: {perfspec['settings']['actions_filepath']}" + ) + + mo.md( + f""" + # Vars settings 🍃 + + **perfspec** includes: `defaults` and `settings` to keep all vars inside one dictionary + + Main configurable variables some of them can be overload from **command-line** + + Be sur `defaults` path exist for: + + | variable | path | + |:---------- | :---------------- | + | data_dirpath | {perfspec["defaults"]["data_dirpath"]} | + | checkpoints_dirpath | {perfspec["defaults"]["checkpoints_dirpath"]} | + | actions_filepath | {perfspec["settings"]["actions_filepath"]} | + + These path can be changed by using full file path with **command-line** arguments + + """ + ) + return Path, perfspec + + +@app.cell(hide_code=True) +def perfspec_args(mo, perfspec): + if mo.cli_args().get("version") == "": + print (f"PerfSPEC Learning: {perfspec['defaults']['version']}") + exit() + if mo.cli_args().get("help") == "": + print ("PerfSPEC Learning:\n") + print (f" To predict sequences use:\n\t\t --input value,value1 (default: {perfspec['defaults']['pred_input']})") + print (f" Epochs to train use:\n\t\t --epochs num-value (default: {perfspec['defaults']['epochs']})") + print (f" For train size use:\n\t\t --train_size decimal-value (default: {perfspec['defaults']['train_size']}i)") + print (f" For sequence size in train model use:\n\t\t --sequence num-value (default: {perfspec['defaults']['train_size']})") + print (f" For checkpoint mode (epochs,weights) in train model use:\n\t\t --checkpoint_mode value (default: {perfspec['defaults']['checkpoint_mode']})") + print (f" For model_filepath use:\n\t\t --model_path model_filepath (default: {perfspec['defaults']['model_path']})") + print (f" For checkpoint_filepath use:\n\t\t --checkpoint_path checkpoint_filepath (default: {perfspec['defaults']['checkpoint_path']})") + print (f" For actions_filepath use:\n\t\t --actions_path actions_filepath\n\t\t (default: {perfspec['settings']['actions_filepath']})") + print (f" For verbose mode use:\n\t\t --verbose (default: {perfspec['defaults']['verbose']})") + exit() + + mo.md( + f""" + # Arguments 🍃 + + Main arguments parsing some of values can be overload from **command-line** + """ + ) + return + + +@app.cell(hide_code=True) +def perfspec_cli_ops(mo): + def out_cli_ops(notebook_name): + return( + f""" + +

Command-Line options

+ +

Input value for prediction trained model

+ ```python {mo.notebook_location()}/{notebook_name} -- --input `value, value-1` ``` +

Model filepath

+ ```python {mo.notebook_location()}/{notebook_name} -- --model_path `model-filepath` ``` +

Checkpoints model filepath

+ ```python {mo.notebook_location()}/{notebook_name} -- --checkpoint_path `checkpoint-filepath` ``` +

Actions filepath

+ ```python {mo.notebook_location()}/{notebook_name} -- --actions_path `actions-filepath` ``` +

Sequence to train model

+ ```python {mo.notebook_location()}/{notebook_name} -- ---sequence `num-value` ``` +

Epochs to train model

+ ```python {mo.notebook_location()}/{notebook_name} -- ---epochs `num-value` ``` +

Train model size

+ ```python {mo.notebook_location()}/{notebook_name} -- ---train_size `decimal-value` ``` +

Train model checkpoint modes

+ ```python {mo.notebook_location()}/{notebook_name} -- ---checkpoint_mode `decimal-value` ``` +

Verbose

+ ```python {mo.notebook_location()}/{notebook_name} -- --verbose ``` + """ + ) + + mo.md( + f""" + # Command-line options 🍃 + """ + ) + return (out_cli_ops,) + + +@app.cell(hide_code=True) +def perfspec_out_settings(mo, perfspec): + def out_settings(notebook_name): + return( + f""" +

Defaults

+ + | variable | value | descripción | + |:---------- | :---- | :------------ | + | models dir path | {perfspec['defaults']['models_dirpath']} | models path | + | model file | **{perfspec['defaults']['model_path']}** | trained model filepath | + | checkpoints model dir path | {perfspec['defaults']['checkpoints_dirpath']} | checkpoint models path | + | checkpoint model file | **{perfspec['defaults']['checkpoint_path']}** | checkpoint trained model filepath | + | data_path | {perfspec['defaults']['data_dirpath']} | data path | + | actions file| **{perfspec['defaults']['actions_path']}** | actions filepath | + | history file| **{perfspec['defaults']['history_path']}** | history filepath | + | input | **{perfspec['defaults']['pred_input']}** | input value for prediction | + | verbose | None | show detail info | + +

Train model settings

+ + | variable | value | descripción | + |:---------- | :---- | :------------ | + | epochs | **{perfspec['defaults']['epochs']}** | or change in `comman-line`
with **--epochs** `num-value` | + | train_size | **{perfspec['defaults']['train_size']}** | or change in `comman-line`
with **--train_size** `decimal-value` | + | sequence_length | **{perfspec['defaults']['sequence_length']}** | Length of sequencs
or change in `comman-line`
with **--sequence** `num-value` | + | checkpoint_mode | **{perfspec['defaults']['checkpoint_mode']}** | Checkpoint_mode (epochs | weights)
or change in `comman-line`
with **--checkpoint_mode** `num-value` | + | input_length | {perfspec['defaults']['sequence_length']} | Length of input sequencs | + | lstm_units_1 | {perfspec['settings']['lstm_units_1']} | Depth of the first LSTM layer | + | lstm_units_2 | {perfspec['settings']['lstm_units_2']} | Depth of the second LSTM layer | + | dropout_rate | {perfspec['settings']['dropout_rate']} | Dropout rate | + | batch_size | {perfspec['settings']['batch_size']} | Adjusted for smaller datasets | + + """ + ) + mo.md( + f""" + # Default values 🍃 + """ + ) + return (out_settings,) + + +@app.cell(hide_code=True) +def perfspec_load_actions(mo): + def load_actions(actions_path, verbose): + import numpy as np + from pathlib import Path + from tensorflow.keras.preprocessing.sequence import pad_sequences + from sklearn.preprocessing import LabelEncoder + actions = [] + unique_actions = [] + if Path(actions_path).exists(): + if verbose != None: #perfspec['settings']['verbose'] != None and mo.cli_args().get("verbose") != None: + print (f"Actions loaded from: {actions_path}") + with open(actions_path, 'r') as file: + for line in file: + action = line.rstrip() + actions.append(action) + if action not in unique_actions: + unique_actions.append(action) + label_encoder = LabelEncoder() + encoded_actions = label_encoder.fit_transform(actions) + return (actions, unique_actions, label_encoder, encoded_actions) + mo.md( + f""" + # Load actions and encoders 🍃 + """ + ) + return (load_actions,) + + +@app.cell(hide_code=True) +def perfspec_input_sequence(mo, perfspec): + def get_input_sequence(input_str, unique_actions): + if input_str == "": + input_str = perfspec['settings']['input_str'] + else: + input_sequence = [] + for str in input_str.split(","): + if str not in unique_actions: + if perfspec['settings']['verbose'] != None or mo.cli_args().get("verbose") != None: + print (f"Input not exist: {str}") + else: + input_sequence.append(str) + return input_sequence + mo.md( + f""" + # Input sequence 🍃 + + Control **input sequence** to be included in __actions data__ in the model + """ + ) + return (get_input_sequence,) + + +@app.cell(hide_code=True) +def perfspec_load_model_from_path(mo, perfspec): + def load_model_from_path(model_path, verbose): + # Load the model + from keras.models import load_model + from train_perfspec import ( + PrecisionMetric, + RecallMetric, + F1ScoreMetric + ) + # Set via options + custom_objects = { + # "PrecisionMetric": PrecisionMetric, + # "RecallMetric": RecallMetric, + # "F1ScoreMetric": F1ScoreMetric, + } + if model_path.exists(): + model = load_model( + model_path, + custom_objects=custom_objects + ) + if verbose != None: + print (f"Trained model loaded from: {model_path}") + return model + + mo.md( + f""" + ## Load model 🍃 + + Load trained model from file **model_path** + + Default path: {perfspec['settings']['model_filepath']} + """ + ) + return (load_model_from_path,) + + +@app.cell(hide_code=True) +def perfspec_predict(mo, perfspec): + def predict_action(model, sequence_length, input_sequence, label_encoder, verbose): + import numpy as np + from sklearn.preprocessing import LabelEncoder + from tensorflow.keras.utils import to_categorical + from tensorflow.keras.preprocessing.sequence import pad_sequences + + predict_encoder = LabelEncoder() + vocab_size = len(label_encoder.classes_) # Total number of unique actions + + # Encode the input sequence using label_encoder + encoded_input = predict_encoder.fit_transform(input_sequence) + + for j in range(len(encoded_input) - sequence_length): + encoded_input.append(encoded_input[j:j + sequence_length]) # Input sequence + + # Pad the input sequence to the expected sequence length + encoded_input = pad_sequences([encoded_input], maxlen=sequence_length, padding='pre') + encoded_input = np.expand_dims(encoded_input, axis=-1) + + predicted = {} + _verbose = verbose if verbose != "-1" else None + + predicted['probabilities'] = model.predict(encoded_input, verbose=_verbose) + predicted['index'] = np.argmax(predicted['probabilities']) #, axis=-1)[0] + predicted['max'] = np.max(predicted['probabilities'], axis=-1)[0] + predicted['max_value'] = "{:2.4f}".format(predicted['max']) + if 'index' in predicted: + predicted['action'] = label_encoder.inverse_transform(np.ravel(predicted['index'])) + if verbose != "-1": + if verbose != None or mo.cli_args().get("verbose") != None: + print(f"Predicted next action: {predicted['action'][0]} with: {predicted['max_value']}") + elif verbose == None and not mo.running_in_notebook(): + print(f"{predicted['action'][0]}") + return (encoded_input,predicted) + + mo.md( + f""" + # Predict action 🍃 + + Predict action from trained model + + Default value: **{perfspec['settings']['input_str']}** + + For **command-line** it can be provided via **--input** `value` (can use several comma separated) + """ + ) + return (predict_action,) + + +if __name__ == "__main__": + app.run()