443 lines
18 KiB
Python
443 lines
18 KiB
Python
import marimo
|
|
|
|
__generated_with = "0.10.17"
|
|
app = marimo.App(width="medium")
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
def app_vars():
|
|
app = dict(
|
|
author_name='Jesús Pérez',
|
|
author_link='mailto:info@jesusperez.pro',
|
|
name='PerfSPEC Learning 🍃',
|
|
train_mode = 'LSTM',
|
|
date="2025",
|
|
version="1.0",
|
|
repo='https://repo.jesusperez.pro/jesus/perfspec-learning',
|
|
about='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/about.md',
|
|
intro='https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/intro.md',
|
|
image='https://repo.jesusperez.pro/jesus/perfspec-learning/media/branch/main/imgs/perfSPEC-learning.png',
|
|
)
|
|
return app
|
|
return (app_vars,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def title(app_vars):
|
|
import marimo as mo
|
|
|
|
notebook_name = 'lib_perfspec.py'
|
|
|
|
_app = app_vars()
|
|
|
|
mo.md(
|
|
f"""
|
|
{mo.hstack([
|
|
mo.md(f"<small>{notebook_name} | vers: {_app['version']} {_app['date']} </small>"),
|
|
mo.md(f"[{_app['author_name']}]({_app['author_link']})"),
|
|
])}
|
|
{mo.vstack(items=[
|
|
mo.md(f"<br>"),
|
|
mo.md(f"# {_app['name']}"),
|
|
mo.md(f'<center><img src="{_app['image']}" width="300" style="border-radius: 20px"; /></center>'),
|
|
],justify='center',gap=0.)}
|
|
It has been design as a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) approach to put `thing` in only one place
|
|
"""
|
|
)
|
|
return mo, notebook_name
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_header(mo, perfspec):
|
|
def header(notebook_name):
|
|
return f"""
|
|
{mo.hstack([
|
|
mo.md(f"<small>{notebook_name} | vers: {perfspec['app']['version']} {perfspec['app']['date']} </small>"),
|
|
perfspec['app']['main_menu'],
|
|
mo.md(f"[{perfspec['app']['author_name']}]({perfspec['app']['author_link']})"),
|
|
])}
|
|
{mo.vstack(items=[
|
|
mo.md(f"<br>"),
|
|
mo.md(f"# {perfspec['app']['name']}"),
|
|
mo.md(f'<center><img src="{perfspec['app']['image']}" width="300" style="border-radius: 20px;"/></center>'),
|
|
],justify='center',gap=0.)}
|
|
"""
|
|
def intro(notebook_name):
|
|
return f"""
|
|
## Intro
|
|
|
|
[{notebook_name}]({perfspec['defaults']['models_dirpath'] / notebook_name}") __notebook__ is created for `trained model PREDICTIONS` with [{perfspec['app']['train_mode']}](https://en.wikipedia.org/wiki/Long_short-term_memory) using [Tensor Keras](https://www.tensorflow.org/guide/keras)
|
|
|
|
Settings are defined in: [lib_prefspec.py]({mo.notebook_dir()} / lib_perfspec.py)
|
|
|
|
Values can be overwritten by using **command-line** see current values below
|
|
"""
|
|
return header, intro
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_vars(app_vars, mo):
|
|
from pathlib import Path
|
|
|
|
perfspec = dict(
|
|
defaults = dict(
|
|
epochs=300,
|
|
train_size=0.8,
|
|
sequence_length=2,
|
|
model_path="perfSPEC_model.keras",
|
|
actions_path="actions-dataset-audit.txt",
|
|
history_path="history.json",
|
|
checkpoint_path="model_at_epoch_{epoch:02d}.keras",
|
|
checkpoint_mode="epochs", # alternative "weights"
|
|
pred_input="delete_apiservices",
|
|
verbose=None,
|
|
data_dirpath=mo.notebook_dir() / "../../data",
|
|
models_dirpath=mo.notebook_dir() / "../../models",
|
|
checkpoints_dirpath=mo.notebook_dir() / "../../models/checkpoints",
|
|
),
|
|
app = app_vars(),
|
|
)
|
|
if not Path(perfspec["defaults"]["models_dirpath"]).exists():
|
|
exit(
|
|
f"Trained models path not found: {perfspec['defaults']['models_dirpath']}"
|
|
)
|
|
|
|
if not Path(perfspec["defaults"]["data_dirpath"]).exists():
|
|
exit(f"Data path not found: {perfspec['defaults']['data_dirpath']}")
|
|
|
|
if not Path(perfspec["defaults"]["checkpoints_dirpath"]).exists():
|
|
exit(
|
|
f"Checkpoints path not found: {perfspec['defaults']['checkpoints_dirpath']}"
|
|
)
|
|
|
|
perfspec["app"]['main_menu'] = mo.nav_menu(
|
|
{
|
|
"#main": "🍃 Main",
|
|
f"{perfspec['app']['intro']}": f"{mo.icon('lucide:shield-check')} Intro",
|
|
f"{perfspec['app']['about']}": f"{mo.icon('lucide:package')} About",
|
|
f"{perfspec['app']['repo']}": f"{mo.icon('lucide:github')} Repo",
|
|
}
|
|
)
|
|
|
|
perfspec["settings"] = dict(
|
|
# Number of epochs
|
|
epochs=mo.cli_args().get("epochs") or perfspec["defaults"]["epochs"],
|
|
checkpoint_mode = mo.cli_args().get("checkpoint_mode") or perfspec["defaults"]["checkpoint_mode"],
|
|
# Number of train_size
|
|
train_size=mo.cli_args().get("train_size")
|
|
or perfspec["defaults"]["train_size"],
|
|
sequence_length=mo.cli_args().get("sequence")
|
|
or perfspec["defaults"]["sequence_length"],
|
|
embedding_dim=50, # Dimensionality of embedding vectors
|
|
input_length=mo.cli_args().get("sequence")
|
|
or perfspec["defaults"]["sequence_length"],
|
|
lstm_units_1=256, # Depth of the first LSTM layer
|
|
lstm_units_2=128, # Depth of the second LSTM layer
|
|
dropout_rate=0.2, # Dropout rate
|
|
batch_size=16, # Adjusted for smaller datasets
|
|
model_history_filepath=perfspec["defaults"]["models_dirpath"]
|
|
/ perfspec["defaults"]["history_path"],
|
|
model_filepath=mo.cli_args().get("model_path")
|
|
or perfspec["defaults"]["models_dirpath"]
|
|
/ perfspec["defaults"]["model_path"],
|
|
checkpoint_filepath=mo.cli_args().get("checkpoint_path")
|
|
or perfspec["defaults"]["checkpoints_dirpath"]
|
|
/ perfspec["defaults"]["checkpoint_path"],
|
|
actions_filepath=mo.cli_args().get("actions_path")
|
|
or perfspec["defaults"]["data_dirpath"]
|
|
/ perfspec["defaults"]["actions_path"],
|
|
input_str=mo.cli_args().get("input") or perfspec["defaults"]["pred_input"],
|
|
main_audit_log = perfspec['defaults']['data_dirpath'] / "main-audit-logs.log",
|
|
raw_audit_log = perfspec['defaults']['data_dirpath'] / "raw-audit-logs.log",
|
|
actions_distrib_filepath = perfspec['defaults']['data_dirpath'] / 'actions_distribution.pdf',
|
|
actions_distrib_format = 'pdf'
|
|
)
|
|
|
|
if mo.cli_args().get("verbose") == "":
|
|
perfspec["settings"]["verbose"] = 1
|
|
else:
|
|
perfspec["settings"]["verbose"] = (
|
|
mo.cli_args().get("verbose") or perfspec["defaults"]["verbose"]
|
|
)
|
|
|
|
if not Path(perfspec["settings"]["actions_filepath"]).exists():
|
|
exit(
|
|
f"Actions path not found: {perfspec['settings']['actions_filepath']}"
|
|
)
|
|
|
|
mo.md(
|
|
f"""
|
|
# Vars settings 🍃
|
|
|
|
**perfspec** includes: `defaults` and `settings` to keep all vars inside one dictionary
|
|
|
|
Main configurable variables some of them can be overload from **command-line**
|
|
|
|
Be sur `defaults` path exist for:
|
|
|
|
| variable | path |
|
|
|:---------- | :---------------- |
|
|
| data_dirpath | {perfspec["defaults"]["data_dirpath"]} |
|
|
| checkpoints_dirpath | {perfspec["defaults"]["checkpoints_dirpath"]} |
|
|
| actions_filepath | {perfspec["settings"]["actions_filepath"]} |
|
|
|
|
These path can be changed by using full file path with **command-line** arguments
|
|
|
|
"""
|
|
)
|
|
return Path, perfspec
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_args(mo, perfspec):
|
|
if mo.cli_args().get("version") == "":
|
|
print (f"PerfSPEC Learning: {perfspec['defaults']['version']}")
|
|
exit()
|
|
if mo.cli_args().get("help") == "":
|
|
print ("PerfSPEC Learning:\n")
|
|
print (f" To predict sequences use:\n\t\t --input value,value1 (default: {perfspec['defaults']['pred_input']})")
|
|
print (f" Epochs to train use:\n\t\t --epochs num-value (default: {perfspec['defaults']['epochs']})")
|
|
print (f" For train size use:\n\t\t --train_size decimal-value (default: {perfspec['defaults']['train_size']}i)")
|
|
print (f" For sequence size in train model use:\n\t\t --sequence num-value (default: {perfspec['defaults']['train_size']})")
|
|
print (f" For checkpoint mode (epochs,weights) in train model use:\n\t\t --checkpoint_mode value (default: {perfspec['defaults']['checkpoint_mode']})")
|
|
print (f" For model_filepath use:\n\t\t --model_path model_filepath (default: {perfspec['defaults']['model_path']})")
|
|
print (f" For checkpoint_filepath use:\n\t\t --checkpoint_path checkpoint_filepath (default: {perfspec['defaults']['checkpoint_path']})")
|
|
print (f" For actions_filepath use:\n\t\t --actions_path actions_filepath\n\t\t (default: {perfspec['settings']['actions_filepath']})")
|
|
print (f" For verbose mode use:\n\t\t --verbose (default: {perfspec['defaults']['verbose']})")
|
|
exit()
|
|
|
|
mo.md(
|
|
f"""
|
|
# Arguments 🍃
|
|
|
|
Main arguments parsing some of values can be overload from **command-line**
|
|
"""
|
|
)
|
|
return
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_cli_ops(mo):
|
|
def out_cli_ops(notebook_name):
|
|
return(
|
|
f"""
|
|
|
|
<h3>Command-Line options</h3>
|
|
|
|
<h4> Input value for prediction trained model </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- --input `value, value-1` ```
|
|
<h4> Model filepath </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- --model_path `model-filepath` ```
|
|
<h4> Checkpoints model filepath </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- --checkpoint_path `checkpoint-filepath` ```
|
|
<h4> Actions filepath </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- --actions_path `actions-filepath` ```
|
|
<h4> Sequence to train model </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- ---sequence `num-value` ```
|
|
<h4> Epochs to train model </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- ---epochs `num-value` ```
|
|
<h4> Train model size </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- ---train_size `decimal-value` ```
|
|
<h4> Train model checkpoint modes </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- ---checkpoint_mode `decimal-value` ```
|
|
<h4> Verbose </h4>
|
|
```python {mo.notebook_location()}/{notebook_name} -- --verbose ```
|
|
"""
|
|
)
|
|
|
|
mo.md(
|
|
f"""
|
|
# Command-line options 🍃
|
|
"""
|
|
)
|
|
return (out_cli_ops,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_out_settings(mo, perfspec):
|
|
def out_settings(notebook_name):
|
|
return(
|
|
f"""
|
|
<h4>Defaults </h4>
|
|
|
|
| variable | value | descripción |
|
|
|:---------- | :---- | :------------ |
|
|
| models dir path | {perfspec['defaults']['models_dirpath']} | models path |
|
|
| model file | **{perfspec['defaults']['model_path']}** | trained model filepath |
|
|
| checkpoints model dir path | {perfspec['defaults']['checkpoints_dirpath']} | checkpoint models path |
|
|
| checkpoint model file | **{perfspec['defaults']['checkpoint_path']}** | checkpoint trained model filepath |
|
|
| data_path | {perfspec['defaults']['data_dirpath']} | data path |
|
|
| actions file| **{perfspec['defaults']['actions_path']}** | actions filepath |
|
|
| history file| **{perfspec['defaults']['history_path']}** | history filepath |
|
|
| input | **{perfspec['defaults']['pred_input']}** | input value for prediction |
|
|
| verbose | None | show detail info |
|
|
|
|
<h4> Train model settings </h4>
|
|
|
|
| variable | value | descripción |
|
|
|:---------- | :---- | :------------ |
|
|
| epochs | **{perfspec['defaults']['epochs']}** | or change in `comman-line` <br> with **--epochs** `num-value` |
|
|
| train_size | **{perfspec['defaults']['train_size']}** | or change in `comman-line` <br> with **--train_size** `decimal-value` |
|
|
| sequence_length | **{perfspec['defaults']['sequence_length']}** | Length of sequencs <br>or change in `comman-line` <br> with **--sequence** `num-value` |
|
|
| checkpoint_mode | **{perfspec['defaults']['checkpoint_mode']}** | Checkpoint_mode (epochs | weights)<br>or change in `comman-line` <br> with **--checkpoint_mode** `num-value` |
|
|
| input_length | {perfspec['defaults']['sequence_length']} | Length of input sequencs |
|
|
| lstm_units_1 | {perfspec['settings']['lstm_units_1']} | Depth of the first LSTM layer |
|
|
| lstm_units_2 | {perfspec['settings']['lstm_units_2']} | Depth of the second LSTM layer |
|
|
| dropout_rate | {perfspec['settings']['dropout_rate']} | Dropout rate |
|
|
| batch_size | {perfspec['settings']['batch_size']} | Adjusted for smaller datasets |
|
|
|
|
"""
|
|
)
|
|
mo.md(
|
|
f"""
|
|
# Default values 🍃
|
|
"""
|
|
)
|
|
return (out_settings,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_load_actions(mo):
|
|
def load_actions(actions_path, verbose):
|
|
import numpy as np
|
|
from pathlib import Path
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
from sklearn.preprocessing import LabelEncoder
|
|
actions = []
|
|
unique_actions = []
|
|
if Path(actions_path).exists():
|
|
if verbose != None: #perfspec['settings']['verbose'] != None and mo.cli_args().get("verbose") != None:
|
|
print (f"Actions loaded from: {actions_path}")
|
|
with open(actions_path, 'r') as file:
|
|
for line in file:
|
|
action = line.rstrip()
|
|
actions.append(action)
|
|
if action not in unique_actions:
|
|
unique_actions.append(action)
|
|
label_encoder = LabelEncoder()
|
|
encoded_actions = label_encoder.fit_transform(actions)
|
|
return (actions, unique_actions, label_encoder, encoded_actions)
|
|
mo.md(
|
|
f"""
|
|
# Load actions and encoders 🍃
|
|
"""
|
|
)
|
|
return (load_actions,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_input_sequence(mo, perfspec):
|
|
def get_input_sequence(input_str, unique_actions):
|
|
if input_str == "":
|
|
input_str = perfspec['settings']['input_str']
|
|
else:
|
|
input_sequence = []
|
|
for str in input_str.split(","):
|
|
if str not in unique_actions:
|
|
if perfspec['settings']['verbose'] != None or mo.cli_args().get("verbose") != None:
|
|
print (f"Input not exist: {str}")
|
|
else:
|
|
input_sequence.append(str)
|
|
return input_sequence
|
|
mo.md(
|
|
f"""
|
|
# Input sequence 🍃
|
|
|
|
Control **input sequence** to be included in __actions data__ in the model
|
|
"""
|
|
)
|
|
return (get_input_sequence,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_load_model_from_path(mo, perfspec):
|
|
def load_model_from_path(model_path, verbose):
|
|
# Load the model
|
|
from keras.models import load_model
|
|
from train_perfspec import (
|
|
PrecisionMetric,
|
|
RecallMetric,
|
|
F1ScoreMetric
|
|
)
|
|
# Set via options
|
|
custom_objects = {
|
|
# "PrecisionMetric": PrecisionMetric,
|
|
# "RecallMetric": RecallMetric,
|
|
# "F1ScoreMetric": F1ScoreMetric,
|
|
}
|
|
if model_path.exists():
|
|
model = load_model(
|
|
model_path,
|
|
custom_objects=custom_objects
|
|
)
|
|
if verbose != None:
|
|
print (f"Trained model loaded from: {model_path}")
|
|
return model
|
|
|
|
mo.md(
|
|
f"""
|
|
## Load model 🍃
|
|
|
|
Load trained model from file **model_path**
|
|
|
|
Default path: {perfspec['settings']['model_filepath']}
|
|
"""
|
|
)
|
|
return (load_model_from_path,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def perfspec_predict(mo, perfspec):
|
|
def predict_action(model, sequence_length, input_sequence, label_encoder, verbose):
|
|
import numpy as np
|
|
from sklearn.preprocessing import LabelEncoder
|
|
from tensorflow.keras.utils import to_categorical
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
|
|
predict_encoder = LabelEncoder()
|
|
vocab_size = len(label_encoder.classes_) # Total number of unique actions
|
|
|
|
# Encode the input sequence using label_encoder
|
|
encoded_input = predict_encoder.fit_transform(input_sequence)
|
|
|
|
for j in range(len(encoded_input) - sequence_length):
|
|
encoded_input.append(encoded_input[j:j + sequence_length]) # Input sequence
|
|
|
|
# Pad the input sequence to the expected sequence length
|
|
encoded_input = pad_sequences([encoded_input], maxlen=sequence_length, padding='pre')
|
|
encoded_input = np.expand_dims(encoded_input, axis=-1)
|
|
|
|
predicted = {}
|
|
_verbose = verbose if verbose != "-1" else None
|
|
|
|
predicted['probabilities'] = model.predict(encoded_input, verbose=_verbose)
|
|
predicted['index'] = np.argmax(predicted['probabilities']) #, axis=-1)[0]
|
|
predicted['max'] = np.max(predicted['probabilities'], axis=-1)[0]
|
|
predicted['max_value'] = "{:2.4f}".format(predicted['max'])
|
|
if 'index' in predicted:
|
|
predicted['action'] = label_encoder.inverse_transform(np.ravel(predicted['index']))
|
|
if verbose != "-1":
|
|
if verbose != None or mo.cli_args().get("verbose") != None:
|
|
print(f"Predicted next action: {predicted['action'][0]} with: {predicted['max_value']}")
|
|
elif verbose == None and not mo.running_in_notebook():
|
|
print(f"{predicted['action'][0]}")
|
|
return (encoded_input,predicted)
|
|
|
|
mo.md(
|
|
f"""
|
|
# Predict action 🍃
|
|
|
|
Predict action from trained model
|
|
|
|
Default value: **{perfspec['settings']['input_str']}**
|
|
|
|
For **command-line** it can be provided via **--input** `value` (can use several comma separated)
|
|
"""
|
|
)
|
|
return (predict_action,)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|