perfspec-learning/learning/python/lib_perfspec.py
2025-01-28 14:21:29 +00:00

443 lines
18 KiB
Python

import marimo
__generated_with = "0.10.17"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _():
def app_vars():
app = dict(
author_name='Jesús Pérez',
author_link='mailto:info@jesusperez.pro',
name='PerfSPEC Learning 🍃',
train_mode = 'LSTM',
date="2025",
version="1.0",
repo='https://repo.jesusperez.pro/jesus/perfspec-learning',
about='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/about.md',
intro='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/intro.md',
image='https://repo.jesusperez.pro/jesus/perfspec-learning/media/branch/main/imgs/perfSPEC-learning.png',
)
return app
return (app_vars,)
@app.cell(hide_code=True)
def title(app_vars):
import marimo as mo
notebook_name = 'lib_perfspec.py'
_app = app_vars()
mo.md(
f"""
{mo.hstack([
mo.md(f"<small>{notebook_name} | vers: {_app['version']} {_app['date']} </small>"),
mo.md(f"[{_app['author_name']}]({_app['author_link']})"),
])}
{mo.vstack(items=[
mo.md(f"<br>"),
mo.md(f"# {_app['name']}"),
mo.md(f'<center><img src="{_app['image']}" width="300" style="border-radius: 20px"; /></center>'),
],justify='center',gap=0.)}
It has been design as a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) approach to put `thing` in only one place
"""
)
return mo, notebook_name
@app.cell(hide_code=True)
def perfspec_header(mo, perfspec):
def header(notebook_name):
return f"""
{mo.hstack([
mo.md(f"<small>{notebook_name} | vers: {perfspec['app']['version']} {perfspec['app']['date']} </small>"),
perfspec['app']['main_menu'],
mo.md(f"[{perfspec['app']['author_name']}]({perfspec['app']['author_link']})"),
])}
{mo.vstack(items=[
mo.md(f"<br>"),
mo.md(f"# {perfspec['app']['name']}"),
mo.md(f'<center><img src="{perfspec['app']['image']}" width="300" style="border-radius: 20px;"/></center>'),
],justify='center',gap=0.)}
"""
def intro(notebook_name):
return f"""
## Intro
[{notebook_name}]({perfspec['defaults']['models_dirpath'] / notebook_name}") __notebook__ is created for `trained model PREDICTIONS` with [{perfspec['app']['train_mode']}](https://en.wikipedia.org/wiki/Long_short-term_memory) using [Tensor Keras](https://www.tensorflow.org/guide/keras)
Settings are defined in: [lib_prefspec.py]({mo.notebook_dir()} / lib_perfspec.py)
Values can be overwritten by using **command-line** see current values below
"""
return header, intro
@app.cell(hide_code=True)
def perfspec_vars(app_vars, mo):
from pathlib import Path
perfspec = dict(
defaults = dict(
epochs=300,
train_size=0.8,
sequence_length=2,
model_path="perfSPEC_model.keras",
actions_path="actions-dataset-audit.txt",
history_path="history.json",
checkpoint_path="model_at_epoch_{epoch:02d}.keras",
checkpoint_mode="epochs", # alternative "weights"
pred_input="delete_apiservices",
verbose=None,
data_dirpath=mo.notebook_dir() / "../../data",
models_dirpath=mo.notebook_dir() / "../../models",
checkpoints_dirpath=mo.notebook_dir() / "../../models/checkpoints",
),
app = app_vars(),
)
if not Path(perfspec["defaults"]["models_dirpath"]).exists():
exit(
f"Trained models path not found: {perfspec['defaults']['models_dirpath']}"
)
if not Path(perfspec["defaults"]["data_dirpath"]).exists():
exit(f"Data path not found: {perfspec['defaults']['data_dirpath']}")
if not Path(perfspec["defaults"]["checkpoints_dirpath"]).exists():
exit(
f"Checkpoints path not found: {perfspec['defaults']['checkpoints_dirpath']}"
)
perfspec["app"]['main_menu'] = mo.nav_menu(
{
"#main": "🍃 Main",
f"{perfspec['app']['intro']}": f"{mo.icon('lucide:shield-check')} Intro",
f"{perfspec['app']['about']}": f"{mo.icon('lucide:package')} About",
f"{perfspec['app']['repo']}": f"{mo.icon('lucide:github')} Repo",
}
)
perfspec["settings"] = dict(
# Number of epochs
epochs=mo.cli_args().get("epochs") or perfspec["defaults"]["epochs"],
checkpoint_mode = mo.cli_args().get("checkpoint_mode") or perfspec["defaults"]["checkpoint_mode"],
# Number of train_size
train_size=mo.cli_args().get("train_size")
or perfspec["defaults"]["train_size"],
sequence_length=mo.cli_args().get("sequence")
or perfspec["defaults"]["sequence_length"],
embedding_dim=50, # Dimensionality of embedding vectors
input_length=mo.cli_args().get("sequence")
or perfspec["defaults"]["sequence_length"],
lstm_units_1=256, # Depth of the first LSTM layer
lstm_units_2=128, # Depth of the second LSTM layer
dropout_rate=0.2, # Dropout rate
batch_size=16, # Adjusted for smaller datasets
model_history_filepath=perfspec["defaults"]["models_dirpath"]
/ perfspec["defaults"]["history_path"],
model_filepath=mo.cli_args().get("model_path")
or perfspec["defaults"]["models_dirpath"]
/ perfspec["defaults"]["model_path"],
checkpoint_filepath=mo.cli_args().get("checkpoint_path")
or perfspec["defaults"]["checkpoints_dirpath"]
/ perfspec["defaults"]["checkpoint_path"],
actions_filepath=mo.cli_args().get("actions_path")
or perfspec["defaults"]["data_dirpath"]
/ perfspec["defaults"]["actions_path"],
input_str=mo.cli_args().get("input") or perfspec["defaults"]["pred_input"],
main_audit_log = perfspec['defaults']['data_dirpath'] / "main-audit-logs.log",
raw_audit_log = perfspec['defaults']['data_dirpath'] / "raw-audit-logs.log",
actions_distrib_filepath = perfspec['defaults']['data_dirpath'] / 'actions_distribution.pdf',
actions_distrib_format = 'pdf'
)
if mo.cli_args().get("verbose") == "":
perfspec["settings"]["verbose"] = 1
else:
perfspec["settings"]["verbose"] = (
mo.cli_args().get("verbose") or perfspec["defaults"]["verbose"]
)
if not Path(perfspec["settings"]["actions_filepath"]).exists():
exit(
f"Actions path not found: {perfspec['settings']['actions_filepath']}"
)
mo.md(
f"""
# Vars settings 🍃
**perfspec** includes: `defaults` and `settings` to keep all vars inside one dictionary
Main configurable variables some of them can be overload from **command-line**
Be sur `defaults` path exist for:
| variable | path |
|:---------- | :---------------- |
| data_dirpath | {perfspec["defaults"]["data_dirpath"]} |
| checkpoints_dirpath | {perfspec["defaults"]["checkpoints_dirpath"]} |
| actions_filepath | {perfspec["settings"]["actions_filepath"]} |
These path can be changed by using full file path with **command-line** arguments
"""
)
return Path, perfspec
@app.cell(hide_code=True)
def perfspec_args(mo, perfspec):
if mo.cli_args().get("version") == "":
print (f"PerfSPEC Learning: {perfspec['defaults']['version']}")
exit()
if mo.cli_args().get("help") == "":
print ("PerfSPEC Learning:\n")
print (f" To predict sequences use:\n\t\t --input value,value1 (default: {perfspec['defaults']['pred_input']})")
print (f" Epochs to train use:\n\t\t --epochs num-value (default: {perfspec['defaults']['epochs']})")
print (f" For train size use:\n\t\t --train_size decimal-value (default: {perfspec['defaults']['train_size']}i)")
print (f" For sequence size in train model use:\n\t\t --sequence num-value (default: {perfspec['defaults']['train_size']})")
print (f" For checkpoint mode (epochs,weights) in train model use:\n\t\t --checkpoint_mode value (default: {perfspec['defaults']['checkpoint_mode']})")
print (f" For model_filepath use:\n\t\t --model_path model_filepath (default: {perfspec['defaults']['model_path']})")
print (f" For checkpoint_filepath use:\n\t\t --checkpoint_path checkpoint_filepath (default: {perfspec['defaults']['checkpoint_path']})")
print (f" For actions_filepath use:\n\t\t --actions_path actions_filepath\n\t\t (default: {perfspec['settings']['actions_filepath']})")
print (f" For verbose mode use:\n\t\t --verbose (default: {perfspec['defaults']['verbose']})")
exit()
mo.md(
f"""
# Arguments 🍃
Main arguments parsing some of values can be overload from **command-line**
"""
)
return
@app.cell(hide_code=True)
def perfspec_cli_ops(mo):
def out_cli_ops(notebook_name):
return(
f"""
<h3>Command-Line options</h3>
<h4> Input value for prediction trained model </h4>
```python {mo.notebook_location()}/{notebook_name} -- --input `value, value-1` ```
<h4> Model filepath </h4>
```python {mo.notebook_location()}/{notebook_name} -- --model_path `model-filepath` ```
<h4> Checkpoints model filepath </h4>
```python {mo.notebook_location()}/{notebook_name} -- --checkpoint_path `checkpoint-filepath` ```
<h4> Actions filepath </h4>
```python {mo.notebook_location()}/{notebook_name} -- --actions_path `actions-filepath` ```
<h4> Sequence to train model </h4>
```python {mo.notebook_location()}/{notebook_name} -- ---sequence `num-value` ```
<h4> Epochs to train model </h4>
```python {mo.notebook_location()}/{notebook_name} -- ---epochs `num-value` ```
<h4> Train model size </h4>
```python {mo.notebook_location()}/{notebook_name} -- ---train_size `decimal-value` ```
<h4> Train model checkpoint modes </h4>
```python {mo.notebook_location()}/{notebook_name} -- ---checkpoint_mode `decimal-value` ```
<h4> Verbose </h4>
```python {mo.notebook_location()}/{notebook_name} -- --verbose ```
"""
)
mo.md(
f"""
# Command-line options 🍃
"""
)
return (out_cli_ops,)
@app.cell(hide_code=True)
def perfspec_out_settings(mo, perfspec):
def out_settings(notebook_name):
return(
f"""
<h4>Defaults </h4>
| variable | value | descripción |
|:---------- | :---- | :------------ |
| models dir path | {perfspec['defaults']['models_dirpath']} | models path |
| model file | **{perfspec['defaults']['model_path']}** | trained model filepath |
| checkpoints model dir path | {perfspec['defaults']['checkpoints_dirpath']} | checkpoint models path |
| checkpoint model file | **{perfspec['defaults']['checkpoint_path']}** | checkpoint trained model filepath |
| data_path | {perfspec['defaults']['data_dirpath']} | data path |
| actions file| **{perfspec['defaults']['actions_path']}** | actions filepath |
| history file| **{perfspec['defaults']['history_path']}** | history filepath |
| input | **{perfspec['defaults']['pred_input']}** | input value for prediction |
| verbose | None | show detail info |
<h4> Train model settings </h4>
| variable | value | descripción |
|:---------- | :---- | :------------ |
| epochs | **{perfspec['defaults']['epochs']}** | or change in `comman-line` <br> with **--epochs** `num-value` |
| train_size | **{perfspec['defaults']['train_size']}** | or change in `comman-line` <br> with **--train_size** `decimal-value` |
| sequence_length | **{perfspec['defaults']['sequence_length']}** | Length of sequencs <br>or change in `comman-line` <br> with **--sequence** `num-value` |
| checkpoint_mode | **{perfspec['defaults']['checkpoint_mode']}** | Checkpoint_mode (epochs | weights)<br>or change in `comman-line` <br> with **--checkpoint_mode** `num-value` |
| input_length | {perfspec['defaults']['sequence_length']} | Length of input sequencs |
| lstm_units_1 | {perfspec['settings']['lstm_units_1']} | Depth of the first LSTM layer |
| lstm_units_2 | {perfspec['settings']['lstm_units_2']} | Depth of the second LSTM layer |
| dropout_rate | {perfspec['settings']['dropout_rate']} | Dropout rate |
| batch_size | {perfspec['settings']['batch_size']} | Adjusted for smaller datasets |
"""
)
mo.md(
f"""
# Default values 🍃
"""
)
return (out_settings,)
@app.cell(hide_code=True)
def perfspec_load_actions(mo):
def load_actions(actions_path, verbose):
import numpy as np
from pathlib import Path
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
actions = []
unique_actions = []
if Path(actions_path).exists():
if verbose != None: #perfspec['settings']['verbose'] != None and mo.cli_args().get("verbose") != None:
print (f"Actions loaded from: {actions_path}")
with open(actions_path, 'r') as file:
for line in file:
action = line.rstrip()
actions.append(action)
if action not in unique_actions:
unique_actions.append(action)
label_encoder = LabelEncoder()
encoded_actions = label_encoder.fit_transform(actions)
return (actions, unique_actions, label_encoder, encoded_actions)
mo.md(
f"""
# Load actions and encoders 🍃
"""
)
return (load_actions,)
@app.cell(hide_code=True)
def perfspec_input_sequence(mo, perfspec):
def get_input_sequence(input_str, unique_actions):
if input_str == "":
input_str = perfspec['settings']['input_str']
else:
input_sequence = []
for str in input_str.split(","):
if str not in unique_actions:
if perfspec['settings']['verbose'] != None or mo.cli_args().get("verbose") != None:
print (f"Input not exist: {str}")
else:
input_sequence.append(str)
return input_sequence
mo.md(
f"""
# Input sequence 🍃
Control **input sequence** to be included in __actions data__ in the model
"""
)
return (get_input_sequence,)
@app.cell(hide_code=True)
def perfspec_load_model_from_path(mo, perfspec):
def load_model_from_path(model_path, verbose):
# Load the model
from keras.models import load_model
from train_perfspec import (
PrecisionMetric,
RecallMetric,
F1ScoreMetric
)
# Set via options
custom_objects = {
# "PrecisionMetric": PrecisionMetric,
# "RecallMetric": RecallMetric,
# "F1ScoreMetric": F1ScoreMetric,
}
if model_path.exists():
model = load_model(
model_path,
custom_objects=custom_objects
)
if verbose != None:
print (f"Trained model loaded from: {model_path}")
return model
mo.md(
f"""
## Load model 🍃
Load trained model from file **model_path**
Default path: {perfspec['settings']['model_filepath']}
"""
)
return (load_model_from_path,)
@app.cell(hide_code=True)
def perfspec_predict(mo, perfspec):
def predict_action(model, sequence_length, input_sequence, label_encoder, verbose):
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
predict_encoder = LabelEncoder()
vocab_size = len(label_encoder.classes_) # Total number of unique actions
# Encode the input sequence using label_encoder
encoded_input = predict_encoder.fit_transform(input_sequence)
for j in range(len(encoded_input) - sequence_length):
encoded_input.append(encoded_input[j:j + sequence_length]) # Input sequence
# Pad the input sequence to the expected sequence length
encoded_input = pad_sequences([encoded_input], maxlen=sequence_length, padding='pre')
encoded_input = np.expand_dims(encoded_input, axis=-1)
predicted = {}
_verbose = verbose if verbose != "-1" else None
predicted['probabilities'] = model.predict(encoded_input, verbose=_verbose)
predicted['index'] = np.argmax(predicted['probabilities']) #, axis=-1)[0]
predicted['max'] = np.max(predicted['probabilities'], axis=-1)[0]
predicted['max_value'] = "{:2.4f}".format(predicted['max'])
if 'index' in predicted:
predicted['action'] = label_encoder.inverse_transform(np.ravel(predicted['index']))
if verbose != "-1":
if verbose != None or mo.cli_args().get("verbose") != None:
print(f"Predicted next action: {predicted['action'][0]} with: {predicted['max_value']}")
elif verbose == None and not mo.running_in_notebook():
print(f"{predicted['action'][0]}")
return (encoded_input,predicted)
mo.md(
f"""
# Predict action 🍃
Predict action from trained model
Default value: **{perfspec['settings']['input_str']}**
For **command-line** it can be provided via **--input** `value` (can use several comma separated)
"""
)
return (predict_action,)
if __name__ == "__main__":
app.run()