2025-01-26 23:59:44 +00:00
import marimo
2025-01-27 06:51:31 +00:00
__generated_with = " 0.10.17 "
2025-01-26 23:59:44 +00:00
app = marimo . App ( width = " medium " )
@app.cell ( hide_code = True )
def _ ( ) :
def app_vars ( ) :
app = dict (
author_name = ' Jesús Pérez ' ,
author_link = ' mailto:info@jesusperez.pro ' ,
name = ' PerfSPEC Learning 🍃 ' ,
train_mode = ' LSTM ' ,
date = " 2025 " ,
version = " 1.0 " ,
repo = ' https://repo.jesusperez.pro/jesus/perfspec-learning ' ,
2025-01-27 07:27:16 +00:00
about = ' https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/about.md ' ,
intro = ' https://repo.jesusperez.pro/jesus/perfspec-learning/src/branch/main/intro.md ' ,
2025-01-28 14:21:29 +00:00
image = ' https://repo.jesusperez.pro/jesus/perfspec-learning/media/branch/main/imgs/perfSPEC-learning.png ' ,
2025-01-26 23:59:44 +00:00
)
return app
return ( app_vars , )
@app.cell ( hide_code = True )
def title ( app_vars ) :
import marimo as mo
notebook_name = ' lib_perfspec.py '
_app = app_vars ( )
mo . md (
f """
{ mo . hstack ( [
mo . md ( f " <small> { notebook_name } | vers: { _app [ ' version ' ] } { _app [ ' date ' ] } </small> " ) ,
mo . md ( f " [ { _app [ ' author_name ' ] } ]( { _app [ ' author_link ' ] } ) " ) ,
] ) }
{ mo . vstack ( items = [
mo . md ( f " <br> " ) ,
mo . md ( f " # { _app [ ' name ' ] } " ) ,
mo . md ( f ' <center><img src= " { _app [ ' image ' ] } " width= " 300 " style= " border-radius: 20px " ; /></center> ' ) ,
] , justify = ' center ' , gap = 0. ) }
It has been design as a [ DRY ] ( https : / / en . wikipedia . org / wiki / Don % 27 t_repeat_yourself ) approach to put ` thing ` in only one place
"""
)
return mo , notebook_name
@app.cell ( hide_code = True )
def perfspec_header ( mo , perfspec ) :
def header ( notebook_name ) :
return f """
{ mo . hstack ( [
mo . md ( f " <small> { notebook_name } | vers: { perfspec [ ' app ' ] [ ' version ' ] } { perfspec [ ' app ' ] [ ' date ' ] } </small> " ) ,
perfspec [ ' app ' ] [ ' main_menu ' ] ,
mo . md ( f " [ { perfspec [ ' app ' ] [ ' author_name ' ] } ]( { perfspec [ ' app ' ] [ ' author_link ' ] } ) " ) ,
] ) }
{ mo . vstack ( items = [
mo . md ( f " <br> " ) ,
mo . md ( f " # { perfspec [ ' app ' ] [ ' name ' ] } " ) ,
mo . md ( f ' <center><img src= " { perfspec [ ' app ' ] [ ' image ' ] } " width= " 300 " style= " border-radius: 20px; " /></center> ' ) ,
] , justify = ' center ' , gap = 0. ) }
"""
def intro ( notebook_name ) :
return f """
## Intro
[ { notebook_name } ] ( { perfspec [ ' defaults ' ] [ ' models_dirpath ' ] / notebook_name } " ) __notebook__ is created for `trained model PREDICTIONS` with [ {perfspec['app']['train_mode']} ](https://en.wikipedia.org/wiki/Long_short-term_memory) using [Tensor Keras](https://www.tensorflow.org/guide/keras)
Settings are defined in : [ lib_prefspec . py ] ( { mo . notebook_dir ( ) } / lib_perfspec . py )
Values can be overwritten by using * * command - line * * see current values below
"""
return header , intro
@app.cell ( hide_code = True )
def perfspec_vars ( app_vars , mo ) :
from pathlib import Path
perfspec = dict (
defaults = dict (
2025-01-27 06:51:31 +00:00
epochs = 300 ,
2025-01-26 23:59:44 +00:00
train_size = 0.8 ,
sequence_length = 2 ,
model_path = " perfSPEC_model.keras " ,
actions_path = " actions-dataset-audit.txt " ,
history_path = " history.json " ,
checkpoint_path = " model_at_epoch_ {epoch:02d} .keras " ,
checkpoint_mode = " epochs " , # alternative "weights"
pred_input = " delete_apiservices " ,
verbose = None ,
data_dirpath = mo . notebook_dir ( ) / " ../../data " ,
models_dirpath = mo . notebook_dir ( ) / " ../../models " ,
checkpoints_dirpath = mo . notebook_dir ( ) / " ../../models/checkpoints " ,
) ,
app = app_vars ( ) ,
)
if not Path ( perfspec [ " defaults " ] [ " models_dirpath " ] ) . exists ( ) :
exit (
f " Trained models path not found: { perfspec [ ' defaults ' ] [ ' models_dirpath ' ] } "
)
if not Path ( perfspec [ " defaults " ] [ " data_dirpath " ] ) . exists ( ) :
exit ( f " Data path not found: { perfspec [ ' defaults ' ] [ ' data_dirpath ' ] } " )
if not Path ( perfspec [ " defaults " ] [ " checkpoints_dirpath " ] ) . exists ( ) :
exit (
f " Checkpoints path not found: { perfspec [ ' defaults ' ] [ ' checkpoints_dirpath ' ] } "
)
perfspec [ " app " ] [ ' main_menu ' ] = mo . nav_menu (
{
" #main " : " 🍃 Main " ,
f " { perfspec [ ' app ' ] [ ' intro ' ] } " : f " { mo . icon ( ' lucide:shield-check ' ) } Intro " ,
f " { perfspec [ ' app ' ] [ ' about ' ] } " : f " { mo . icon ( ' lucide:package ' ) } About " ,
f " { perfspec [ ' app ' ] [ ' repo ' ] } " : f " { mo . icon ( ' lucide:github ' ) } Repo " ,
}
)
perfspec [ " settings " ] = dict (
# Number of epochs
epochs = mo . cli_args ( ) . get ( " epochs " ) or perfspec [ " defaults " ] [ " epochs " ] ,
checkpoint_mode = mo . cli_args ( ) . get ( " checkpoint_mode " ) or perfspec [ " defaults " ] [ " checkpoint_mode " ] ,
# Number of train_size
train_size = mo . cli_args ( ) . get ( " train_size " )
or perfspec [ " defaults " ] [ " train_size " ] ,
sequence_length = mo . cli_args ( ) . get ( " sequence " )
or perfspec [ " defaults " ] [ " sequence_length " ] ,
embedding_dim = 50 , # Dimensionality of embedding vectors
input_length = mo . cli_args ( ) . get ( " sequence " )
or perfspec [ " defaults " ] [ " sequence_length " ] ,
lstm_units_1 = 256 , # Depth of the first LSTM layer
lstm_units_2 = 128 , # Depth of the second LSTM layer
dropout_rate = 0.2 , # Dropout rate
batch_size = 16 , # Adjusted for smaller datasets
model_history_filepath = perfspec [ " defaults " ] [ " models_dirpath " ]
/ perfspec [ " defaults " ] [ " history_path " ] ,
model_filepath = mo . cli_args ( ) . get ( " model_path " )
or perfspec [ " defaults " ] [ " models_dirpath " ]
/ perfspec [ " defaults " ] [ " model_path " ] ,
checkpoint_filepath = mo . cli_args ( ) . get ( " checkpoint_path " )
or perfspec [ " defaults " ] [ " checkpoints_dirpath " ]
/ perfspec [ " defaults " ] [ " checkpoint_path " ] ,
actions_filepath = mo . cli_args ( ) . get ( " actions_path " )
or perfspec [ " defaults " ] [ " data_dirpath " ]
/ perfspec [ " defaults " ] [ " actions_path " ] ,
input_str = mo . cli_args ( ) . get ( " input " ) or perfspec [ " defaults " ] [ " pred_input " ] ,
2025-01-27 00:50:44 +00:00
main_audit_log = perfspec [ ' defaults ' ] [ ' data_dirpath ' ] / " main-audit-logs.log " ,
raw_audit_log = perfspec [ ' defaults ' ] [ ' data_dirpath ' ] / " raw-audit-logs.log " ,
actions_distrib_filepath = perfspec [ ' defaults ' ] [ ' data_dirpath ' ] / ' actions_distribution.pdf ' ,
actions_distrib_format = ' pdf '
2025-01-26 23:59:44 +00:00
)
if mo . cli_args ( ) . get ( " verbose " ) == " " :
perfspec [ " settings " ] [ " verbose " ] = 1
else :
perfspec [ " settings " ] [ " verbose " ] = (
mo . cli_args ( ) . get ( " verbose " ) or perfspec [ " defaults " ] [ " verbose " ]
)
if not Path ( perfspec [ " settings " ] [ " actions_filepath " ] ) . exists ( ) :
exit (
f " Actions path not found: { perfspec [ ' settings ' ] [ ' actions_filepath ' ] } "
)
mo . md (
f """
# Vars settings 🍃
* * perfspec * * includes : ` defaults ` and ` settings ` to keep all vars inside one dictionary
Main configurable variables some of them can be overload from * * command - line * *
Be sur ` defaults ` path exist for :
| variable | path |
| : - - - - - - - - - - | : - - - - - - - - - - - - - - - - |
| data_dirpath | { perfspec [ " defaults " ] [ " data_dirpath " ] } |
| checkpoints_dirpath | { perfspec [ " defaults " ] [ " checkpoints_dirpath " ] } |
| actions_filepath | { perfspec [ " settings " ] [ " actions_filepath " ] } |
These path can be changed by using full file path with * * command - line * * arguments
"""
)
return Path , perfspec
@app.cell ( hide_code = True )
def perfspec_args ( mo , perfspec ) :
if mo . cli_args ( ) . get ( " version " ) == " " :
print ( f " PerfSPEC Learning: { perfspec [ ' defaults ' ] [ ' version ' ] } " )
exit ( )
if mo . cli_args ( ) . get ( " help " ) == " " :
print ( " PerfSPEC Learning: \n " )
print ( f " To predict sequences use: \n \t \t --input value,value1 (default: { perfspec [ ' defaults ' ] [ ' pred_input ' ] } ) " )
print ( f " Epochs to train use: \n \t \t --epochs num-value (default: { perfspec [ ' defaults ' ] [ ' epochs ' ] } ) " )
print ( f " For train size use: \n \t \t --train_size decimal-value (default: { perfspec [ ' defaults ' ] [ ' train_size ' ] } i) " )
print ( f " For sequence size in train model use: \n \t \t --sequence num-value (default: { perfspec [ ' defaults ' ] [ ' train_size ' ] } ) " )
print ( f " For checkpoint mode (epochs,weights) in train model use: \n \t \t --checkpoint_mode value (default: { perfspec [ ' defaults ' ] [ ' checkpoint_mode ' ] } ) " )
print ( f " For model_filepath use: \n \t \t --model_path model_filepath (default: { perfspec [ ' defaults ' ] [ ' model_path ' ] } ) " )
print ( f " For checkpoint_filepath use: \n \t \t --checkpoint_path checkpoint_filepath (default: { perfspec [ ' defaults ' ] [ ' checkpoint_path ' ] } ) " )
print ( f " For actions_filepath use: \n \t \t --actions_path actions_filepath \n \t \t (default: { perfspec [ ' settings ' ] [ ' actions_filepath ' ] } ) " )
print ( f " For verbose mode use: \n \t \t --verbose (default: { perfspec [ ' defaults ' ] [ ' verbose ' ] } ) " )
exit ( )
mo . md (
f """
# Arguments 🍃
Main arguments parsing some of values can be overload from * * command - line * *
"""
)
return
@app.cell ( hide_code = True )
def perfspec_cli_ops ( mo ) :
def out_cli_ops ( notebook_name ) :
return (
f """
< h3 > Command - Line options < / h3 >
< h4 > Input value for prediction trained model < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - input ` value , value - 1 ` ` ` `
< h4 > Model filepath < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - model_path ` model - filepath ` ` ` `
< h4 > Checkpoints model filepath < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - checkpoint_path ` checkpoint - filepath ` ` ` `
< h4 > Actions filepath < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - actions_path ` actions - filepath ` ` ` `
< h4 > Sequence to train model < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - - sequence ` num - value ` ` ` `
< h4 > Epochs to train model < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - - epochs ` num - value ` ` ` `
< h4 > Train model size < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - - train_size ` decimal - value ` ` ` `
< h4 > Train model checkpoint modes < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - - checkpoint_mode ` decimal - value ` ` ` `
< h4 > Verbose < / h4 >
` ` ` python { mo . notebook_location ( ) } / { notebook_name } - - - - verbose ` ` `
"""
)
mo . md (
f """
# Command-line options 🍃
"""
)
return ( out_cli_ops , )
@app.cell ( hide_code = True )
def perfspec_out_settings ( mo , perfspec ) :
def out_settings ( notebook_name ) :
return (
f """
< h4 > Defaults < / h4 >
| variable | value | descripción |
| : - - - - - - - - - - | : - - - - | : - - - - - - - - - - - - |
| models dir path | { perfspec [ ' defaults ' ] [ ' models_dirpath ' ] } | models path |
| model file | * * { perfspec [ ' defaults ' ] [ ' model_path ' ] } * * | trained model filepath |
| checkpoints model dir path | { perfspec [ ' defaults ' ] [ ' checkpoints_dirpath ' ] } | checkpoint models path |
| checkpoint model file | * * { perfspec [ ' defaults ' ] [ ' checkpoint_path ' ] } * * | checkpoint trained model filepath |
| data_path | { perfspec [ ' defaults ' ] [ ' data_dirpath ' ] } | data path |
| actions file | * * { perfspec [ ' defaults ' ] [ ' actions_path ' ] } * * | actions filepath |
| history file | * * { perfspec [ ' defaults ' ] [ ' history_path ' ] } * * | history filepath |
| input | * * { perfspec [ ' defaults ' ] [ ' pred_input ' ] } * * | input value for prediction |
| verbose | None | show detail info |
< h4 > Train model settings < / h4 >
| variable | value | descripción |
| : - - - - - - - - - - | : - - - - | : - - - - - - - - - - - - |
| epochs | * * { perfspec [ ' defaults ' ] [ ' epochs ' ] } * * | or change in ` comman - line ` < br > with * * - - epochs * * ` num - value ` |
| train_size | * * { perfspec [ ' defaults ' ] [ ' train_size ' ] } * * | or change in ` comman - line ` < br > with * * - - train_size * * ` decimal - value ` |
| sequence_length | * * { perfspec [ ' defaults ' ] [ ' sequence_length ' ] } * * | Length of sequencs < br > or change in ` comman - line ` < br > with * * - - sequence * * ` num - value ` |
| checkpoint_mode | * * { perfspec [ ' defaults ' ] [ ' checkpoint_mode ' ] } * * | Checkpoint_mode ( epochs | weights ) < br > or change in ` comman - line ` < br > with * * - - checkpoint_mode * * ` num - value ` |
| input_length | { perfspec [ ' defaults ' ] [ ' sequence_length ' ] } | Length of input sequencs |
| lstm_units_1 | { perfspec [ ' settings ' ] [ ' lstm_units_1 ' ] } | Depth of the first LSTM layer |
| lstm_units_2 | { perfspec [ ' settings ' ] [ ' lstm_units_2 ' ] } | Depth of the second LSTM layer |
| dropout_rate | { perfspec [ ' settings ' ] [ ' dropout_rate ' ] } | Dropout rate |
| batch_size | { perfspec [ ' settings ' ] [ ' batch_size ' ] } | Adjusted for smaller datasets |
"""
)
mo . md (
f """
# Default values 🍃
"""
)
return ( out_settings , )
@app.cell ( hide_code = True )
def perfspec_load_actions ( mo ) :
def load_actions ( actions_path , verbose ) :
import numpy as np
from pathlib import Path
from tensorflow . keras . preprocessing . sequence import pad_sequences
from sklearn . preprocessing import LabelEncoder
actions = [ ]
unique_actions = [ ]
if Path ( actions_path ) . exists ( ) :
if verbose != None : #perfspec['settings']['verbose'] != None and mo.cli_args().get("verbose") != None:
print ( f " Actions loaded from: { actions_path } " )
with open ( actions_path , ' r ' ) as file :
for line in file :
action = line . rstrip ( )
actions . append ( action )
if action not in unique_actions :
unique_actions . append ( action )
label_encoder = LabelEncoder ( )
encoded_actions = label_encoder . fit_transform ( actions )
return ( actions , unique_actions , label_encoder , encoded_actions )
mo . md (
f """
# Load actions and encoders 🍃
"""
)
return ( load_actions , )
@app.cell ( hide_code = True )
def perfspec_input_sequence ( mo , perfspec ) :
def get_input_sequence ( input_str , unique_actions ) :
if input_str == " " :
input_str = perfspec [ ' settings ' ] [ ' input_str ' ]
else :
input_sequence = [ ]
for str in input_str . split ( " , " ) :
if str not in unique_actions :
if perfspec [ ' settings ' ] [ ' verbose ' ] != None or mo . cli_args ( ) . get ( " verbose " ) != None :
print ( f " Input not exist: { str } " )
else :
input_sequence . append ( str )
return input_sequence
mo . md (
f """
# Input sequence 🍃
Control * * input sequence * * to be included in __actions data__ in the model
"""
)
return ( get_input_sequence , )
@app.cell ( hide_code = True )
def perfspec_load_model_from_path ( mo , perfspec ) :
def load_model_from_path ( model_path , verbose ) :
# Load the model
from keras . models import load_model
from train_perfspec import (
PrecisionMetric ,
RecallMetric ,
F1ScoreMetric
)
# Set via options
custom_objects = {
# "PrecisionMetric": PrecisionMetric,
# "RecallMetric": RecallMetric,
# "F1ScoreMetric": F1ScoreMetric,
}
if model_path . exists ( ) :
model = load_model (
model_path ,
custom_objects = custom_objects
)
if verbose != None :
print ( f " Trained model loaded from: { model_path } " )
return model
mo . md (
f """
## Load model 🍃
Load trained model from file * * model_path * *
Default path : { perfspec [ ' settings ' ] [ ' model_filepath ' ] }
"""
)
return ( load_model_from_path , )
@app.cell ( hide_code = True )
def perfspec_predict ( mo , perfspec ) :
def predict_action ( model , sequence_length , input_sequence , label_encoder , verbose ) :
import numpy as np
from sklearn . preprocessing import LabelEncoder
from tensorflow . keras . utils import to_categorical
from tensorflow . keras . preprocessing . sequence import pad_sequences
predict_encoder = LabelEncoder ( )
vocab_size = len ( label_encoder . classes_ ) # Total number of unique actions
# Encode the input sequence using label_encoder
encoded_input = predict_encoder . fit_transform ( input_sequence )
for j in range ( len ( encoded_input ) - sequence_length ) :
encoded_input . append ( encoded_input [ j : j + sequence_length ] ) # Input sequence
# Pad the input sequence to the expected sequence length
encoded_input = pad_sequences ( [ encoded_input ] , maxlen = sequence_length , padding = ' pre ' )
encoded_input = np . expand_dims ( encoded_input , axis = - 1 )
predicted = { }
_verbose = verbose if verbose != " -1 " else None
predicted [ ' probabilities ' ] = model . predict ( encoded_input , verbose = _verbose )
predicted [ ' index ' ] = np . argmax ( predicted [ ' probabilities ' ] ) #, axis=-1)[0]
predicted [ ' max ' ] = np . max ( predicted [ ' probabilities ' ] , axis = - 1 ) [ 0 ]
predicted [ ' max_value ' ] = " {:2.4f} " . format ( predicted [ ' max ' ] )
if ' index ' in predicted :
predicted [ ' action ' ] = label_encoder . inverse_transform ( np . ravel ( predicted [ ' index ' ] ) )
if verbose != " -1 " :
if verbose != None or mo . cli_args ( ) . get ( " verbose " ) != None :
print ( f " Predicted next action: { predicted [ ' action ' ] [ 0 ] } with: { predicted [ ' max_value ' ] } " )
elif verbose == None and not mo . running_in_notebook ( ) :
print ( f " { predicted [ ' action ' ] [ 0 ] } " )
return ( encoded_input , predicted )
mo . md (
f """
# Predict action 🍃
Predict action from trained model
Default value : * * { perfspec [ ' settings ' ] [ ' input_str ' ] } * *
For * * command - line * * it can be provided via * * - - input * * ` value ` ( can use several comma separated )
"""
)
return ( predict_action , )
if __name__ == " __main__ " :
app . run ( )