471 lines
78 KiB
Plaintext
471 lines
78 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "c91abe80-8d3c-42f2-a3d0-c74ad99f23b0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sequence_length = 2 # longitud de la ventana"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "1be328c3-d3b7-4558-84b7-1a4131f58168",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
|
|
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
|
"\n",
|
|
"events = []\n",
|
|
"unique_events = []\n",
|
|
"with open('events-dataset-audit.txt', 'r') as file:\n",
|
|
" # Read all lines into a list\n",
|
|
" for line in file:\n",
|
|
" event = line.rstrip()\n",
|
|
" events.append(event)\n",
|
|
" if event not in unique_events:\n",
|
|
" unique_events.append(event)\n",
|
|
"\n",
|
|
"\n",
|
|
"# Initialize the tokenizer\n",
|
|
"tokenizer = Tokenizer(num_words=10000) #len(data)) # Limit vocabulary size to 10,000 words\n",
|
|
"tokenizer.fit_on_texts(unique_events)\n",
|
|
"\n",
|
|
"# Convert data to sequences of integers\n",
|
|
"X_train_seq = tokenizer.texts_to_sequences(unique_events)\n",
|
|
"\n",
|
|
"# Pad sequences to a fixed length (e.g., 10 words)\n",
|
|
"X_train_padded = pad_sequences(X_train_seq, maxlen=10)\n",
|
|
"\n",
|
|
"#len(X_train_padded)\n",
|
|
"\n",
|
|
"#unique_values"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "07059547-7de6-4987-b421-980411fa6f96",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = []\n",
|
|
"y = [] \n",
|
|
"encoded_events = unique_events\n",
|
|
"for i in range(len(encoded_events) - sequence_length):\n",
|
|
" X.append(encoded_events[i:i + sequence_length]) # Secuencia de entrada\n",
|
|
" y.append(encoded_events[i + sequence_length]) # Evento objetivo\n",
|
|
"\n",
|
|
"#X\n",
|
|
"#y"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "dfb9b92f-75ec-4994-982c-d3dd35807a33",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"vocab_size = len(unique_events) \n",
|
|
"\n",
|
|
"# Convertir X a one-shot encoding \n",
|
|
"X_one_hot = np.zeros((len(X), sequence_length, vocab_size))\n",
|
|
"for i, sequence in enumerate(X):\n",
|
|
" for j, event in enumerate(sequence):\n",
|
|
" X_one_hot[i, j] = 1 \n",
|
|
"\n",
|
|
"# Convertir y a one-shot encoding \n",
|
|
"y_one_hot = np.zeros((len(y), vocab_size))\n",
|
|
"for i, event in enumerate(y):\n",
|
|
" y_one_hot[i] = 1 \n",
|
|
"\n",
|
|
"#y_one_hot[0]\n",
|
|
"#X_one_hot"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "8a8baab3-9a9c-4b0f-a27a-403ceee39e39",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"X_train shape: (144, 2, 146)\n",
|
|
"y_train shape: (144, 146)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"print(\"X_train shape:\", X_one_hot.shape)\n",
|
|
"print(\"y_train shape:\", y_one_hot.shape)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "74548eef-86b7-4050-8fb9-24fd28213c1d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/jesusperezlorenzo/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/rnn.py:200: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
|
|
" super().__init__(**kwargs)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential\"</span>\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1mModel: \"sequential\"\u001b[0m\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
|
|
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
|
|
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
|
|
"│ lstm (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">412,672</span> │\n",
|
|
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
|
"│ lstm_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">197,120</span> │\n",
|
|
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
|
"│ dense (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">146</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">18,834</span> │\n",
|
|
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
|
|
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
|
|
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
|
|
"│ lstm (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m412,672\u001b[0m │\n",
|
|
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
|
"│ lstm_1 (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m197,120\u001b[0m │\n",
|
|
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
|
"│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m146\u001b[0m) │ \u001b[38;5;34m18,834\u001b[0m │\n",
|
|
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">628,626</span> (2.40 MB)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m628,626\u001b[0m (2.40 MB)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">628,626</span> (2.40 MB)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m628,626\u001b[0m (2.40 MB)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
|
|
"</pre>\n"
|
|
],
|
|
"text/plain": [
|
|
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Epoch 1/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.0000e+00 - loss: 727.8144 - val_accuracy: 0.0000e+00 - val_loss: 727.9026\n",
|
|
"Epoch 2/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.0000e+00 - loss: 727.9030 - val_accuracy: 0.0000e+00 - val_loss: 728.2988\n",
|
|
"Epoch 3/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 728.2980 - val_accuracy: 0.0000e+00 - val_loss: 729.1073\n",
|
|
"Epoch 4/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 729.1066 - val_accuracy: 0.0000e+00 - val_loss: 730.4880\n",
|
|
"Epoch 5/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 730.4852 - val_accuracy: 0.0000e+00 - val_loss: 732.5898\n",
|
|
"Epoch 6/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 732.5815 - val_accuracy: 0.0000e+00 - val_loss: 735.4796\n",
|
|
"Epoch 7/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - accuracy: 0.0000e+00 - loss: 735.4669 - val_accuracy: 0.0000e+00 - val_loss: 739.1130\n",
|
|
"Epoch 8/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.0000e+00 - loss: 739.0916 - val_accuracy: 0.0000e+00 - val_loss: 743.3353\n",
|
|
"Epoch 9/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 48ms/step - accuracy: 0.0000e+00 - loss: 743.3066 - val_accuracy: 0.0000e+00 - val_loss: 747.9219\n",
|
|
"Epoch 10/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 747.9171 - val_accuracy: 0.0000e+00 - val_loss: 752.6348\n",
|
|
"Epoch 11/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 752.6017 - val_accuracy: 0.0000e+00 - val_loss: 757.2569\n",
|
|
"Epoch 12/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 757.2258 - val_accuracy: 0.0000e+00 - val_loss: 761.6079\n",
|
|
"Epoch 13/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 761.6020 - val_accuracy: 0.0000e+00 - val_loss: 765.5532\n",
|
|
"Epoch 14/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 765.4924 - val_accuracy: 0.0000e+00 - val_loss: 769.0084\n",
|
|
"Epoch 15/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 768.9764 - val_accuracy: 0.0000e+00 - val_loss: 771.9425\n",
|
|
"Epoch 16/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 771.9097 - val_accuracy: 0.0000e+00 - val_loss: 774.3696\n",
|
|
"Epoch 17/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 774.3486 - val_accuracy: 0.0000e+00 - val_loss: 776.3380\n",
|
|
"Epoch 18/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step - accuracy: 0.0000e+00 - loss: 776.3245 - val_accuracy: 0.0000e+00 - val_loss: 777.9149\n",
|
|
"Epoch 19/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 45ms/step - accuracy: 0.0000e+00 - loss: 777.8726 - val_accuracy: 0.0000e+00 - val_loss: 779.1755\n",
|
|
"Epoch 20/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 779.1675 - val_accuracy: 0.0000e+00 - val_loss: 780.1923\n",
|
|
"Epoch 21/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 0.0000e+00 - loss: 780.2054 - val_accuracy: 0.0000e+00 - val_loss: 781.0284\n",
|
|
"Epoch 22/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 49ms/step - accuracy: 0.0000e+00 - loss: 781.0122 - val_accuracy: 0.0000e+00 - val_loss: 781.7344\n",
|
|
"Epoch 23/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.0000e+00 - loss: 781.7183 - val_accuracy: 0.0000e+00 - val_loss: 782.3456\n",
|
|
"Epoch 24/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 782.3322 - val_accuracy: 0.0000e+00 - val_loss: 782.8824\n",
|
|
"Epoch 25/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.0000e+00 - loss: 782.8856 - val_accuracy: 0.0000e+00 - val_loss: 783.3542\n",
|
|
"Epoch 26/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 783.3599 - val_accuracy: 0.0000e+00 - val_loss: 783.7628\n",
|
|
"Epoch 27/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 783.7398 - val_accuracy: 0.0000e+00 - val_loss: 784.1066\n",
|
|
"Epoch 28/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.0000e+00 - loss: 784.0988 - val_accuracy: 0.0000e+00 - val_loss: 784.3851\n",
|
|
"Epoch 29/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.0000e+00 - loss: 784.3781 - val_accuracy: 0.0000e+00 - val_loss: 784.6022\n",
|
|
"Epoch 30/30\n",
|
|
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.0000e+00 - loss: 784.5906 - val_accuracy: 0.0000e+00 - val_loss: 784.7664\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"from tensorflow.keras.models import Sequential\n",
|
|
"from tensorflow.keras. layers import LSTM, Dense\n",
|
|
"from tensorflow.keras.optimizers import Adam\n",
|
|
"\n",
|
|
"# Parameters\n",
|
|
"input_length = 2 # Length of the sequence window (adjust if it's 3)\n",
|
|
"vocab_size = len(unique_events) #95 # Number of unique events in the vocabulary\n",
|
|
"lstm_units_1 = 256 # Depth of the first LSTM layer\n",
|
|
"lstm_units_2 = 128 # Depth of the second LSTM layer\n",
|
|
"dropout_rate = 0.2 # Recurrent dropout rate\n",
|
|
"batch_size = 256 # Batch size for training\n",
|
|
"epochs = 30 # Number of epochs\n",
|
|
"\n",
|
|
"# Model architecture|\n",
|
|
"model = Sequential([\n",
|
|
" LSTM(lstm_units_1, return_sequences=True, recurrent_dropout=dropout_rate, input_shape=(input_length, vocab_size)),\n",
|
|
" LSTM(lstm_units_2, return_sequences=False, recurrent_dropout=dropout_rate), \n",
|
|
" Dense(vocab_size, activation='softmax') # Dense layer with softmax activation\n",
|
|
"])\n",
|
|
"# Compile the model\n",
|
|
"model.compile(\n",
|
|
" optimizer=Adam(), # Adam optimizer with default parameters\n",
|
|
" loss='categorical_crossentropy', # Categorical cross-entropy loss\n",
|
|
" metrics=['accuracy']\n",
|
|
")\n",
|
|
"# Print the model summary\n",
|
|
"model.summary()\n",
|
|
"# Model training (example; you need to provide X_train and y_train as numpy arrays)\n",
|
|
"# model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)\n",
|
|
"X_train = X_one_hot\n",
|
|
"y_train = y_one_hot\n",
|
|
"history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)\n",
|
|
"#model.fit(X_train_padded, X_train_padded, batch_size=batch_size, epochs=epochs, validation_split=0.2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "36862d14-4bdc-44f2-a7f0-3f09693ffddf",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Assuming `history` is the result of model.fit()\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"# Plot training & validation accuracy values\n",
|
|
"plt.plot(history.history['accuracy'])\n",
|
|
"plt.plot(history.history['val_accuracy'])\n",
|
|
"plt.title('Model Accuracy')\n",
|
|
"plt.xlabel('Epochs')\n",
|
|
"plt.ylabel('Accuracy')\n",
|
|
"plt.legend(['Train', 'Test'], loc='upper left')\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"# Plot training & validation loss values\n",
|
|
"plt.plot(history.history['loss'])\n",
|
|
"plt.plot(history.history['val_loss'])\n",
|
|
"plt.title('Model Loss')\n",
|
|
"plt.xlabel('Epochs')\n",
|
|
"plt.ylabel('Loss')\n",
|
|
"plt.legend(['Train', 'Test'], loc='upper left')\n",
|
|
"plt.show()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 308,
|
|
"id": "1736c65b-a9ab-45ee-8e62-6dee44efda34",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#from tensorflow.keras.preprocessing import sequence\n",
|
|
"#review_sequence = X_one_hot[0]\n",
|
|
"#prediction = model.predict(review_sequence)\n",
|
|
"#print (y_train[2])\n",
|
|
"#print (X[5])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 434,
|
|
"id": "0d17d5da-a357-4b7d-968e-f415f2aa7168",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_test = [['update_mutatingwebhookconfigurations', 'patch_nodes']]\n",
|
|
"y_test = ['update_mutatingwebhookconfigurations', 'patch_nodes']\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"sequence_length = 2 # longitud de la ventana\n",
|
|
"\n",
|
|
"test_size = len(X_test)\n",
|
|
"\n",
|
|
"# Convertir X a one-shot encoding \n",
|
|
"X_test_one_hot = np.zeros((len(X_test), sequence_length, test_size))\n",
|
|
"\n",
|
|
"for i, sequence in enumerate(X_test):\n",
|
|
" for j, event in enumerate(sequence):\n",
|
|
" X_test_one_hot[i, j] = 1 \n",
|
|
"\n",
|
|
"# Convertir y a one-shot encoding \n",
|
|
"y_test_one_hot = np.zeros((len(y_test), test_size))\n",
|
|
"for i, event in enumerate(y_test):\n",
|
|
" y_test_one_hot[i] = 1 \n",
|
|
"#X_test_one_hot\n",
|
|
"#y_test_one_hot"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 394,
|
|
"id": "27831a1f-5e0e-4ac0-830d-6f13feec711a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Assuming you have a trained model and a test dataset (X_test, y_test)\n",
|
|
"#loss, accuracy = model.evaluate(X_test_one_hot, y_test_one_hot)\n",
|
|
"\n",
|
|
"#print(f\"Test Loss: {loss}\")\n",
|
|
"#print(f\"Test Accuracy: {accuracy}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 414,
|
|
"id": "f72d7129-29d4-4ff1-951b-73556499fcc9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#y_pred = model.predict(X_test_one_hot)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 426,
|
|
"id": "320dfc2c-6937-45d1-9404-ad6ce82d5e35",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Save the model\n",
|
|
"model.save('perfSPEC_model.keras')\n",
|
|
"\n",
|
|
"# Load the model\n",
|
|
"from keras.models import load_model\n",
|
|
"loaded_model = load_model('perfSPEC_model.keras')\n",
|
|
"\n",
|
|
"# You can now use the loaded model for further predictions or evaluation\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|