chore: add learning and process path with python content

This commit is contained in:
Jesús Pérez Lorenzo 2025-01-20 07:55:53 +00:00
parent 01987b061f
commit 3cfbb87202
6 changed files with 30668 additions and 94287 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,656 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bc478b25-756f-4686-8995-a079add279b4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0a7982a2-a348-43c7-93b8-774605276a60",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>kind</th>\n",
" <th>apiVersion</th>\n",
" <th>level</th>\n",
" <th>auditID</th>\n",
" <th>stage</th>\n",
" <th>requestURI</th>\n",
" <th>verb</th>\n",
" <th>sourceIPs</th>\n",
" <th>userAgent</th>\n",
" <th>requestReceivedTimestamp</th>\n",
" <th>...</th>\n",
" <th>annotations_k8s.io/removed-release</th>\n",
" <th>annotations_pod-security.kubernetes.io/enforce-policy</th>\n",
" <th>annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_5</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_5</th>\n",
" <th>annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_7</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_7</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_18</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_16</th>\n",
" <th>annotations</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>4fed3bf5-d1a4-4bf3-a137-bb9ff5380486</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/nodes?allowWatchBookmarks=true&amp;fieldSe...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.128513Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>a1b86d5d-6f8f-4058-9710-19f5cc40a68a</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/namespaces/kube-flannel/configmaps?all...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.129781Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>a1b86d5d-6f8f-4058-9710-19f5cc40a68a</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/api/v1/namespaces/kube-flannel/configmaps?all...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.129781Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/namespaces/kube-system/configmaps?allo...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.130396Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/api/v1/namespaces/kube-system/configmaps?allo...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.130396Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94282</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>ee239268-d276-431e-897b-c9e4ae2ec4a2</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/readyz</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-probe/1.23</td>\n",
" <td>2023-01-27T19:03:15.516370Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94283</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>c4027d84-5a11-46f5-808b-cde4c52e68a2</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-scheduler/v1.23.15 (linux/amd64) kubernet...</td>\n",
" <td>2023-01-27T19:03:15.630828Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94284</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>ccd5ea66-1586-4453-8634-1428d88795f3</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-controller-manager/v1.23.15 (linux/amd64)...</td>\n",
" <td>2023-01-27T19:03:15.644742Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94285</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>42728a6e-4b34-406e-89e4-fad679bde27b</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>update</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-scheduler/v1.23.15 (linux/amd64) kubernet...</td>\n",
" <td>2023-01-27T19:03:15.644221Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94286</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>db1fa802-3c2b-43f3-ad5d-cb09ac400074</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>update</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-controller-manager/v1.23.15 (linux/amd64)...</td>\n",
" <td>2023-01-27T19:03:15.653453Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>94287 rows × 43 columns</p>\n",
"</div>"
],
"text/plain": [
" kind apiVersion level auditID \\\n",
"0 Event audit.k8s.io/v1 Metadata 4fed3bf5-d1a4-4bf3-a137-bb9ff5380486 \n",
"1 Event audit.k8s.io/v1 Metadata a1b86d5d-6f8f-4058-9710-19f5cc40a68a \n",
"2 Event audit.k8s.io/v1 Metadata a1b86d5d-6f8f-4058-9710-19f5cc40a68a \n",
"3 Event audit.k8s.io/v1 Metadata 3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c \n",
"4 Event audit.k8s.io/v1 Metadata 3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c \n",
"... ... ... ... ... \n",
"94282 Event audit.k8s.io/v1 Metadata ee239268-d276-431e-897b-c9e4ae2ec4a2 \n",
"94283 Event audit.k8s.io/v1 Metadata c4027d84-5a11-46f5-808b-cde4c52e68a2 \n",
"94284 Event audit.k8s.io/v1 Metadata ccd5ea66-1586-4453-8634-1428d88795f3 \n",
"94285 Event audit.k8s.io/v1 Metadata 42728a6e-4b34-406e-89e4-fad679bde27b \n",
"94286 Event audit.k8s.io/v1 Metadata db1fa802-3c2b-43f3-ad5d-cb09ac400074 \n",
"\n",
" stage requestURI \\\n",
"0 ResponseStarted /api/v1/nodes?allowWatchBookmarks=true&fieldSe... \n",
"1 ResponseStarted /api/v1/namespaces/kube-flannel/configmaps?all... \n",
"2 ResponseComplete /api/v1/namespaces/kube-flannel/configmaps?all... \n",
"3 ResponseStarted /api/v1/namespaces/kube-system/configmaps?allo... \n",
"4 ResponseComplete /api/v1/namespaces/kube-system/configmaps?allo... \n",
"... ... ... \n",
"94282 ResponseComplete /readyz \n",
"94283 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94284 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94285 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94286 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"\n",
" verb sourceIPs \\\n",
"0 watch [192.168.60.11] \n",
"1 watch [192.168.60.11] \n",
"2 watch [192.168.60.11] \n",
"3 watch [192.168.60.11] \n",
"4 watch [192.168.60.11] \n",
"... ... ... \n",
"94282 get [192.168.60.10] \n",
"94283 get [192.168.60.10] \n",
"94284 get [192.168.60.10] \n",
"94285 update [192.168.60.10] \n",
"94286 update [192.168.60.10] \n",
"\n",
" userAgent \\\n",
"0 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"1 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"2 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"3 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"4 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"... ... \n",
"94282 kube-probe/1.23 \n",
"94283 kube-scheduler/v1.23.15 (linux/amd64) kubernet... \n",
"94284 kube-controller-manager/v1.23.15 (linux/amd64)... \n",
"94285 kube-scheduler/v1.23.15 (linux/amd64) kubernet... \n",
"94286 kube-controller-manager/v1.23.15 (linux/amd64)... \n",
"\n",
" requestReceivedTimestamp ... annotations_k8s.io/removed-release \\\n",
"0 2023-01-27T18:28:08.128513Z ... NaN \n",
"1 2023-01-27T18:28:08.129781Z ... NaN \n",
"2 2023-01-27T18:28:08.129781Z ... NaN \n",
"3 2023-01-27T18:28:08.130396Z ... NaN \n",
"4 2023-01-27T18:28:08.130396Z ... NaN \n",
"... ... ... ... \n",
"94282 2023-01-27T19:03:15.516370Z ... NaN \n",
"94283 2023-01-27T19:03:15.630828Z ... NaN \n",
"94284 2023-01-27T19:03:15.644742Z ... NaN \n",
"94285 2023-01-27T19:03:15.644221Z ... NaN \n",
"94286 2023-01-27T19:03:15.653453Z ... NaN \n",
"\n",
" annotations_pod-security.kubernetes.io/enforce-policy \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_5 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_5 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_7 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_7 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_18 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_16 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_15 \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
"[94287 rows x 43 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file_path = \"../../raw-audit-logs.log\"\n",
"\n",
"# Set the chunk size (number of rows to process at a time)\n",
"chunk_size = 1000 # Adjust based on your available memory\n",
"\n",
"# Create an empty list to hold the chunks\n",
"chunks = []\n",
"\n",
"# Iterate over the file in chunks\n",
"try:\n",
" for chunk in pd.read_json(file_path, lines=True, chunksize=chunk_size, encoding_errors='ignore'):\n",
" # Append each chunk to the list\n",
" chunks.append(chunk)\n",
"except ValueError as e:\n",
" print(f\"Error while parsing JSON: {e}\")\n",
"\n",
"# Combine all chunks into a single DataFrame\n",
"df = pd.concat(chunks, ignore_index=True)\n",
"\n",
"# Display the first few rows of the DataFrame\n",
"#print(df.head())\n",
"#df['user_parsed'] = df['user'].apply(json.loads)\n",
"#df_exploded = df.explode('user')\n",
"\n",
"df_normalized = pd.json_normalize(df.to_dict(orient='records'),\n",
"# record_path=['details'],\n",
"# meta=['timestamp', 'event', ['user', 'id'], ['user', 'name'], ['user', 'email']],\n",
" sep='_'\n",
")\n",
"\n",
"#df.head()\n",
"#df_normalized.info()\n",
"df_normalized.iloc[0:2]\n",
"df_normalized"
]
},
{
"cell_type": "code",
"execution_count": 224,
"id": "c11f7c76-4453-4829-879e-7ac7f1faf541",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2163"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"auditLogDF = pd.read_csv(r\"data/audit-logs.csv\", names=['hostIP', 'Timestamp', 'username', 'verb', 'resourceType', 'subresource', 'ObjectName', 'ObjectName1', 'requestURI', 'auditID', 'stage', 'statusCode', 'UID', 'ReferenceUID', 'UID1'])\n",
"\n",
"#print(auditLogDF)\n",
"\n",
"#print(auditLogDF.count)\n",
"auditLogDF = auditLogDF.drop_duplicates(ignore_index=True)\n",
"\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[0, 2, 5, 8, 9, 10, 11]], axis=1)\n",
"auditLogDF = auditLogDF[\n",
" (auditLogDF['verb'] != \"get\") &\n",
" (auditLogDF['verb'] != \"watch\") &\n",
" (auditLogDF['verb'] != \"list\") &\n",
" (auditLogDF['resourceType'] != \"events\") &\n",
" (auditLogDF['resourceType'] != \"leases\")\n",
" ]\n",
"\n",
"#print(auditLogDF.count)\n",
"auditLogDF = auditLogDF[auditLogDF.resourceType != \"replicationcontrollers\"]\n",
"\n",
"auditLogDF[\"event_type\"] = auditLogDF[\"verb\"] + \"_\" + auditLogDF[\"resourceType\"]\n",
"#auditLogDF = auditLogDF[auditLogDF.event_type != \"delete_service\"]\n",
"#auditLogDF = auditLogDF[auditLogDF.event_type != \"create namespaces\"]\n",
"auditLogDF['ObjectName'] = auditLogDF['ObjectName'].combine_first(auditLogDF['ObjectName1'])\n",
"auditLogDF[\"UID\"] = auditLogDF[\"UID\"].fillna(auditLogDF['UID1'])\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[4, 7]], axis=1)\n",
"auditLogDF[\"UID\"] = auditLogDF[\"ReferenceUID\"].fillna(auditLogDF['UID'])\n",
"#auditLogDF = auditLogDF.sort_values([\"UID\", \"Timestamp\"], ascending = (False, True))\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[0, 5]], axis=1)\n",
"#auditLogDF = auditLogDF.drop_duplicates()\n",
"\n",
"#print(auditLogDF.to_string())\n",
"#print(auditLogDF.count)\n",
"#print(auditLogDF)\n",
"\n",
"#len(auditLogDF)"
]
},
{
"cell_type": "code",
"execution_count": 227,
"id": "6dce47db-06fb-4dca-8eda-056118f7459b",
"metadata": {},
"outputs": [],
"source": [
"\n",
"event_seq = []\n",
"event_sub_seq = []\n",
"for c, r in auditLogDF.iterrows():\n",
" with open(\"data/events-dataset-audit.txt\", \"a\") as event_file:\n",
" event_file.write(\"%s\\n\" % r['event_type'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff