Compare commits

..

No commits in common. "18cf59ce70967f2f83f652f04c5256e21118d6c5" and "867404c1f3d62b9a224afd17d74e4478ef5eb543" have entirely different histories.

10 changed files with 16583 additions and 153 deletions

1
.gitignore vendored
View File

@ -7,4 +7,3 @@ workspace.code-workspace
.VSCodeCounter
wrks
tmp
__pycache__

View File

@ -1,57 +0,0 @@
# PerfSPEC Learning Phase
Based in [PrefSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers](https://ieeexplore.ieee.org/document/10577533) document presented in [1], thir repository contains source files used to generate and process data.
[PrefSPEC document](PerfSPEC.pdf)
[Presentación in Spanish](presentacion.pdf)
<div style="margin: auto">
<a target="_blank" href="perfspec-learning/src/branch/main/presentacion.pdf"><img src="imgs/prefSPEC-learning.png" width="800"></a>
</div>
__PerfSPEC__ has three phases:
- Ranking
- Learning
- Runtime
This repository is focused in __Learning__ phase with attention on:
- Event logs info load and process
- Predictive learning model
> Note: It is considered that __event data collection__ in `raw-audit-logs.log.gz` are realistic and representative to simulate
administrative operations.
## Files
- `raw-audit-logs.log` contains raw Kubernetes audit logs collected using the `audit-policy.yaml` audit policy.
Tools are distributed in directories:
- [Collect](collect)
- [Process](process)
- [Learning](learning)
As some tasks can be used in [Python](https://python.org) or [Rust](https://www.rust-lang.org/) there are directories for each programming languge inside directories tasks.
Each `task/programming-language` may have a __data__ directory where processing output files is generated.
### Collect data
If you wish to [collect](collect) your own dataset, there are several source files that might help:
- `collect/collect.py` is a script to trigger the installation and uninstallation of public Helm repositories.
- `collect/helm-charts.json` is a backup of Helm charts used at the time of the collection.
### Process data
### Learning
## Reference
[1]: [H. Kermabon-Bobinnec et al., "PerfSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers," in IEEE Transactions on Dependable and Secure Computing, doi: 10.1109/TDSC.2024.3420712.](https://ieeexplore.ieee.org/document/10577533)

Binary file not shown.

16564
data/audit-logs.csv Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,57 +0,0 @@
# PerfSPEC Learning Phase INTRO
Based in [PrefSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers](https://ieeexplore.ieee.org/document/10577533) document presented in [1], thir repository contains source files used to generate and process data.
[PrefSPEC document](PerfSPEC.pdf)
[Presentación in Spanish](presentacion.pdf)
<div style="margin: auto">
<a target="_blank" href="perfspec-learning/src/branch/main/presentacion.pdf"><img src="imgs/prefSPEC-learning.png" width="800"></a>
</div>
__PerfSPEC__ has three phases:
- Ranking
- Learning
- Runtime
This repository is focused in __Learning__ phase with attention on:
- Event logs info load and process
- Predictive learning model
> Note: It is considered that __event data collection__ in `raw-audit-logs.log.gz` are realistic and representative to simulate
administrative operations.
## Files
- `raw-audit-logs.log` contains raw Kubernetes audit logs collected using the `audit-policy.yaml` audit policy.
Tools are distributed in directories:
- [Collect](collect)
- [Process](process)
- [Learning](learning)
As some tasks can be used in [Python](https://python.org) or [Rust](https://www.rust-lang.org/) there are directories for each programming languge inside directories tasks.
Each `task/programming-language` may have a __data__ directory where processing output files is generated.
### Collect data
If you wish to [collect](collect) your own dataset, there are several source files that might help:
- `collect/collect.py` is a script to trigger the installation and uninstallation of public Helm repositories.
- `collect/helm-charts.json` is a backup of Helm charts used at the time of the collection.
### Process data
### Learning
## Reference
[1]: [H. Kermabon-Bobinnec et al., "PerfSPEC: Performance Profiling-based Proactive Security Policy Enforcement for Containers," in IEEE Transactions on Dependable and Secure Computing, doi: 10.1109/TDSC.2024.3420712.](https://ieeexplore.ieee.org/document/10577533)

View File

@ -1,6 +1,6 @@
import marimo
__generated_with = "0.10.17"
__generated_with = "0.10.16"
app = marimo.App(width="medium")
@ -82,7 +82,7 @@ def perfspec_vars(app_vars, mo):
perfspec = dict(
defaults = dict(
epochs=300,
epochs=400,
train_size=0.8,
sequence_length=2,
model_path="perfSPEC_model.keras",

View File

@ -1,15 +1,6 @@
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "keras==3.8.0",
# "marimo",
# "numpy==2.2.2",
# ]
# ///
import marimo
__generated_with = "0.10.17"
__generated_with = "0.10.16"
app = marimo.App(width="medium")
@ -467,21 +458,15 @@ def f1_score_metric(PrecisionMetric, RecallMetric, keras, mo, tf):
@app.cell(hide_code=True)
def custom_validation_metrics(mo, tf):
def custom_validation_metrics(X_val, mo, tf, y_val):
#Custom callback to compute metrics on validation data
class CustomValidationMetrics(tf.keras.callbacks.Callback):
def __init__(self, X_val, y_val):
super().__init__() # Initialize the parent class
self.X_val = X_val
self.y_val = y_val
def on_epoch_end(self, epoch, logs=None):
val_predictions = self.model.predict(self.X_val, verbose=0)
val_predictions = self.model.predict(X_val, verbose=0)
val_predictions = (val_predictions > 0.5).astype(int) # Binarize predictions
# Compute precision, recall, and f1-score
precision = tf.keras.metrics.Precision()(self.y_val, val_predictions)
recall = tf.keras.metrics.Recall()(self.y_val, val_predictions)
precision = tf.keras.metrics.Precision()(y_val, val_predictions)
recall = tf.keras.metrics.Recall()(y_val, val_predictions)
f1_score = 2 * (precision * recall) / (precision + recall + 1e-7)
print(f"\nEpoch {epoch + 1} Validation Metrics - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f}")
@ -558,7 +543,7 @@ def show_train_model_shape(mo, perfspec, prepare_train, verbose):
@app.cell(hide_code=True)
def make_model(CustomValidationMetrics, mo, np, perfspec):
def make_model(mo, np, perfspec):
# Define the LSTM model
def make_model(X=[],y=[],label_encoder=[], encoded_actions=[]):
if len(X) == 0 or len(y) == 0:
@ -584,13 +569,13 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):
perfspec['vars']['model'] = Sequential(
[
#Embedding(input_dim=vocab_size, output_dim=embedding_dim),
Input(shape=(perfspec['settings']['sequence_length'], 1)),
LSTM(
perfspec['settings']['lstm_units_1'],
return_sequences=True,
recurrent_dropout=perfspec['settings']['dropout_rate'],
input_shape=(perfspec['settings']['sequence_length'], 1),
),
#input_shape = (2,vocab_size),
),
Input(shape=(perfspec['settings']['sequence_length'], 1)),
LSTM(
perfspec['settings']['lstm_units_2'],
return_sequences=False,
@ -637,12 +622,11 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):
# Callbacks
early_stopping = EarlyStopping(
monitor="val_loss", patience=10, restore_best_weights=True
monitor="val_loss", patience=5, restore_best_weights=True
)
lr_reduction = ReduceLROnPlateau(
monitor="val_loss", patience=8, factor=0.8, min_lr=0.0001
monitor="val_loss", patience=3, factor=0.5, min_lr=0.0001
)
custom_metrics_callback = CustomValidationMetrics(X, y)
if perfspec['settings']['checkpoint_mode'] == "weights":
# Save only the weights of the model instead of the full model.
checkpoint = ModelCheckpoint(
@ -661,9 +645,8 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):
verbose=1 # Print messages when saving
)
callbacks=[early_stopping,lr_reduction]
callbacks=[early_stopping,lr_reduction]
callbacks.append(custom_metrics_callback)
callbacks=[early_stopping,lr_reduction] #,CustomValidationMetrics]
callbacks=[] #,CustomValidationMetrics]
if checkpoint != None:
callbacks.append(checkpoint)
@ -693,7 +676,7 @@ def make_model(CustomValidationMetrics, mo, np, perfspec):
This is where **model** is creates and **fit**
Saved in `perfspec['vars']` as `model` and `history`
Saved in `perfspec['vars'] as `model` and `history`
"""
)
return (make_model,)
@ -808,11 +791,10 @@ def perfspec_save_model(Path, mo, perfspec):
def perfspec_plot_history(Path, mo):
def plot_history(perfspec):
import json
from keras.src.callbacks import History
if 'vars' not in perfspec:
return None
if perfspec['vars']['history'] != None:
if isinstance(perfspec['vars']['history'], History):
if 'history' in perfspec['vars']['history']:
_model_history = perfspec['vars']['history'].history
else:
_model_history = perfspec['vars']['history']
@ -1011,9 +993,8 @@ def perfspec_evaluate_model(Path, mo, np, prepare_train):
def history_info(perfspec):
import json
from keras.src.callbacks import History
if perfspec['vars']['history'] != None:
if isinstance(perfspec['vars']['history'], History):
if 'history' in perfspec['vars']['history']:
model_history = perfspec['vars']['history'].history
else:
model_history = perfspec['vars']['history']
@ -1026,7 +1007,7 @@ def perfspec_evaluate_model(Path, mo, np, prepare_train):
model_history = json.load(history_file)
if model_history != None:
from prettytable import PrettyTable
train_loss = model_history['loss']
rain_loss = model_history['loss']
val_loss = model_history['val_loss']
train_acc = model_history['accuracy']
val_acc = model_history['val_accuracy']

Binary file not shown.