10 changed files with 16583 additions and 153 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,4 +7,3 @@ workspace.code-workspace
 .VSCodeCounter
 wrks
 tmp
-__pycache__
--- a/about.md
+++ b/about.md
@ -1,57 +0,0 @@
-# PerfSPEC Learning Phase
-
-Based in [PrefSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers](https://ieeexplore.ieee.org/document/10577533) document presented in [1], thir repository contains source files used to generate and process data.
-
-[PrefSPEC document](PerfSPEC.pdf)
-
-[Presentación in Spanish](presentacion.pdf)
-
-<div style="margin: auto">
- <a target="_blank" href="perfspec-learning/src/branch/main/presentacion.pdf"><img src="imgs/prefSPEC-learning.png" width="800"></a>
-</div>
-
-__PerfSPEC__ has three phases:
-
- Ranking
- Learning
- Runtime
-
-This repository is focused in __Learning__ phase with attention on:
-
- Event logs info load and process
- Predictive learning model 
-
-> Note: It is considered that __event data collection__ in `raw-audit-logs.log.gz` are realistic and representative to simulate
-administrative operations.
-
-## Files
-
- `raw-audit-logs.log` contains raw Kubernetes audit logs collected using the `audit-policy.yaml` audit policy.
-
-Tools are distributed in directories:
-
- [Collect](collect)
- [Process](process)
- [Learning](learning)
-
-As some tasks can be used in [Python](https://python.org) or [Rust](https://www.rust-lang.org/) there are directories for each programming languge inside directories tasks.  
-
-Each `task/programming-language` may have a __data__ directory where processing output files is generated. 
-
-### Collect data
-
-If you wish to [collect](collect) your own dataset, there are several source files that might help:
-
- `collect/collect.py` is a script to trigger the installation and uninstallation of public Helm repositories.
- `collect/helm-charts.json` is a backup of Helm charts used at the time of the collection.
-
-### Process data 
-
-
-### Learning
-
-
-
-## Reference
-
-[1]: [H. Kermabon-Bobinnec et al., "PerfSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers," in IEEE Transactions on Dependable and Secure Computing, doi: 10.1109/TDSC.2024.3420712.](https://ieeexplore.ieee.org/document/10577533)
--- a/data/actions_distribution.pdf
+++ b/data/actions_distribution.pdf
--- a/data/audit-logs.csv
+++ b/data/audit-logs.csv
--- a/models_sample.tar.xz
+++ b/models_sample.tar.xz
--- a/data_sample.tar.xz
+++ b/data_sample.tar.xz
--- a/intro.md
+++ b/intro.md
@ -1,57 +0,0 @@
-# PerfSPEC Learning Phase  INTRO
-
-Based in [PrefSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers](https://ieeexplore.ieee.org/document/10577533) document presented in [1], thir repository contains source files used to generate and process data.
-
-[PrefSPEC document](PerfSPEC.pdf)
-
-[Presentación in Spanish](presentacion.pdf)
-
-<div style="margin: auto">
- <a target="_blank" href="perfspec-learning/src/branch/main/presentacion.pdf"><img src="imgs/prefSPEC-learning.png" width="800"></a>
-</div>
-
-__PerfSPEC__ has three phases:
-
- Ranking
- Learning
- Runtime
-
-This repository is focused in __Learning__ phase with attention on:
-
- Event logs info load and process
- Predictive learning model 
-
-> Note: It is considered that __event data collection__ in `raw-audit-logs.log.gz` are realistic and representative to simulate
-administrative operations.
-
-## Files
-
- `raw-audit-logs.log` contains raw Kubernetes audit logs collected using the `audit-policy.yaml` audit policy.
-
-Tools are distributed in directories:
-
- [Collect](collect)
- [Process](process)
- [Learning](learning)
-
-As some tasks can be used in [Python](https://python.org) or [Rust](https://www.rust-lang.org/) there are directories for each programming languge inside directories tasks.  
-
-Each `task/programming-language` may have a __data__ directory where processing output files is generated. 
-
-### Collect data
-
-If you wish to [collect](collect) your own dataset, there are several source files that might help:
-
- `collect/collect.py` is a script to trigger the installation and uninstallation of public Helm repositories.
- `collect/helm-charts.json` is a backup of Helm charts used at the time of the collection.
-
-### Process data 
-
-
-### Learning
-
-
-
-## Reference
-
-[1]: [H. Kermabon-Bobinnec et al., "PerfSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers," in IEEE Transactions on Dependable and Secure Computing, doi: 10.1109/TDSC.2024.3420712.](https://ieeexplore.ieee.org/document/10577533)
--- a/learning/python/lib_perfspec.py
+++ b/learning/python/lib_perfspec.py
@ -1,6 +1,6 @@
 import marimo

-__generated_with = "0.10.17"
+__generated_with = "0.10.16"
 app = marimo.App(width="medium")


@ -82,7 +82,7 @@ def perfspec_vars(app_vars, mo):

    perfspec = dict(
      defaults = dict(
-            epochs=300,
+            epochs=400,
            train_size=0.8,
            sequence_length=2,
            model_path="perfSPEC_model.keras",
--- a/learning/python/train_perfspec.py
+++ b/learning/python/train_perfspec.py
@ -1,15 +1,6 @@
-# /// script
-# requires-python = ">=3.13"
-# dependencies = [
-#     "keras==3.8.0",
-#     "marimo",
-#     "numpy==2.2.2",
-# ]
-# ///
-
 import marimo

-__generated_with = "0.10.17"
+__generated_with = "0.10.16"
 app = marimo.App(width="medium")


@ -467,21 +458,15 @@ def f1_score_metric(PrecisionMetric, RecallMetric, keras, mo, tf):


@app.cell(hide_code=True)
-def custom_validation_metrics(mo, tf):
+def custom_validation_metrics(X_val, mo, tf, y_val):
    #Custom callback to compute metrics on validation data
    class CustomValidationMetrics(tf.keras.callbacks.Callback):
-        def __init__(self, X_val, y_val):
-            super().__init__()  # Initialize the parent class
-            self.X_val = X_val
-            self.y_val = y_val
-
        def on_epoch_end(self, epoch, logs=None):
-            val_predictions = self.model.predict(self.X_val, verbose=0)
+            val_predictions = self.model.predict(X_val, verbose=0)
            val_predictions = (val_predictions > 0.5).astype(int)  # Binarize predictions

-            # Compute precision, recall, and f1-score
-            precision = tf.keras.metrics.Precision()(self.y_val, val_predictions)
-            recall = tf.keras.metrics.Recall()(self.y_val, val_predictions)
+            precision = tf.keras.metrics.Precision()(y_val, val_predictions)
+            recall = tf.keras.metrics.Recall()(y_val, val_predictions)
            f1_score = 2 * (precision * recall) / (precision + recall + 1e-7)

            print(f"\nEpoch {epoch + 1} Validation Metrics - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f}")
@ -558,7 +543,7 @@ def show_train_model_shape(mo, perfspec, prepare_train, verbose):


@app.cell(hide_code=True)
-def make_model(CustomValidationMetrics, mo, np, perfspec):
+def make_model(mo, np, perfspec):
    #  Define the LSTM model
    def make_model(X=[],y=[],label_encoder=[], encoded_actions=[]):
        if len(X) == 0 or len(y) == 0:
@ -584,13 +569,13 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):
        perfspec['vars']['model'] = Sequential(
            [
                #Embedding(input_dim=vocab_size, output_dim=embedding_dim),
-                Input(shape=(perfspec['settings']['sequence_length'], 1)),
                LSTM(
                    perfspec['settings']['lstm_units_1'],
                    return_sequences=True,
                    recurrent_dropout=perfspec['settings']['dropout_rate'],
-                    input_shape=(perfspec['settings']['sequence_length'], 1),
-                ),            
+                    #input_shape = (2,vocab_size),
+                ),
+                Input(shape=(perfspec['settings']['sequence_length'], 1)),
                LSTM(
                    perfspec['settings']['lstm_units_2'],
                    return_sequences=False,
@ -637,12 +622,11 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):

        # Callbacks
        early_stopping = EarlyStopping(
-            monitor="val_loss", patience=10, restore_best_weights=True
+            monitor="val_loss", patience=5, restore_best_weights=True
        )
        lr_reduction = ReduceLROnPlateau(
-            monitor="val_loss", patience=8, factor=0.8, min_lr=0.0001
+            monitor="val_loss", patience=3, factor=0.5, min_lr=0.0001
        )
-        custom_metrics_callback = CustomValidationMetrics(X, y)
        if perfspec['settings']['checkpoint_mode'] == "weights":
            # Save only the weights of the model instead of the full model.
            checkpoint = ModelCheckpoint(
@ -661,9 +645,8 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):
                verbose=1                  # Print messages when saving
            )

-        callbacks=[early_stopping,lr_reduction]
-        callbacks=[early_stopping,lr_reduction]
-        callbacks.append(custom_metrics_callback)
+        callbacks=[early_stopping,lr_reduction]  #,CustomValidationMetrics] 
+        callbacks=[]  #,CustomValidationMetrics] 
        if checkpoint != None:
            callbacks.append(checkpoint)

@ -693,7 +676,7 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):

        This is where **model** is creates and **fit**

-        Saved in `perfspec['vars']` as `model` and `history`
+        Saved in `perfspec['vars'] as `model` and `history`
        """
    )
    return (make_model,)
@ -808,11 +791,10 @@ def perfspec_save_model(Path, mo, perfspec):
 def perfspec_plot_history(Path, mo):
    def plot_history(perfspec):
        import json
-        from keras.src.callbacks import History
        if 'vars' not in perfspec:
            return None
        if perfspec['vars']['history'] != None:
-            if isinstance(perfspec['vars']['history'], History):
+            if 'history' in perfspec['vars']['history']:
                _model_history = perfspec['vars']['history'].history
            else:
                _model_history = perfspec['vars']['history']
@ -1011,9 +993,8 @@ def perfspec_evaluate_model(Path, mo, np, prepare_train):

    def history_info(perfspec):
        import json
-        from keras.src.callbacks import History
        if perfspec['vars']['history'] != None:
-            if isinstance(perfspec['vars']['history'], History):
+            if 'history' in perfspec['vars']['history']:
                model_history = perfspec['vars']['history'].history
            else:
                model_history = perfspec['vars']['history']
@ -1026,7 +1007,7 @@ def perfspec_evaluate_model(Path, mo, np, prepare_train):
                    model_history = json.load(history_file)
        if model_history != None:
            from prettytable import PrettyTable
-            train_loss = model_history['loss']
+            rain_loss = model_history['loss']
            val_loss = model_history['val_loss']
            train_acc = model_history['accuracy']
            val_acc = model_history['val_accuracy']
--- a/raw-audit-logs.log.xz
+++ b/raw-audit-logs.log.xz