clean: process files

This commit is contained in:
Jesús Pérez Lorenzo 2025-01-27 00:58:24 +00:00
parent d5d5cfa0e5
commit 0912192a97
3 changed files with 0 additions and 23709 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,656 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bc478b25-756f-4686-8995-a079add279b4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0a7982a2-a348-43c7-93b8-774605276a60",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>kind</th>\n",
" <th>apiVersion</th>\n",
" <th>level</th>\n",
" <th>auditID</th>\n",
" <th>stage</th>\n",
" <th>requestURI</th>\n",
" <th>verb</th>\n",
" <th>sourceIPs</th>\n",
" <th>userAgent</th>\n",
" <th>requestReceivedTimestamp</th>\n",
" <th>...</th>\n",
" <th>annotations_k8s.io/removed-release</th>\n",
" <th>annotations_pod-security.kubernetes.io/enforce-policy</th>\n",
" <th>annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_5</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_5</th>\n",
" <th>annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_7</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_7</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_18</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_16</th>\n",
" <th>annotations</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>4fed3bf5-d1a4-4bf3-a137-bb9ff5380486</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/nodes?allowWatchBookmarks=true&amp;fieldSe...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.128513Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>a1b86d5d-6f8f-4058-9710-19f5cc40a68a</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/namespaces/kube-flannel/configmaps?all...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.129781Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>a1b86d5d-6f8f-4058-9710-19f5cc40a68a</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/api/v1/namespaces/kube-flannel/configmaps?all...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.129781Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/namespaces/kube-system/configmaps?allo...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.130396Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/api/v1/namespaces/kube-system/configmaps?allo...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.130396Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94282</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>ee239268-d276-431e-897b-c9e4ae2ec4a2</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/readyz</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-probe/1.23</td>\n",
" <td>2023-01-27T19:03:15.516370Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94283</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>c4027d84-5a11-46f5-808b-cde4c52e68a2</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-scheduler/v1.23.15 (linux/amd64) kubernet...</td>\n",
" <td>2023-01-27T19:03:15.630828Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94284</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>ccd5ea66-1586-4453-8634-1428d88795f3</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-controller-manager/v1.23.15 (linux/amd64)...</td>\n",
" <td>2023-01-27T19:03:15.644742Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94285</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>42728a6e-4b34-406e-89e4-fad679bde27b</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>update</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-scheduler/v1.23.15 (linux/amd64) kubernet...</td>\n",
" <td>2023-01-27T19:03:15.644221Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94286</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>db1fa802-3c2b-43f3-ad5d-cb09ac400074</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>update</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-controller-manager/v1.23.15 (linux/amd64)...</td>\n",
" <td>2023-01-27T19:03:15.653453Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>94287 rows × 43 columns</p>\n",
"</div>"
],
"text/plain": [
" kind apiVersion level auditID \\\n",
"0 Event audit.k8s.io/v1 Metadata 4fed3bf5-d1a4-4bf3-a137-bb9ff5380486 \n",
"1 Event audit.k8s.io/v1 Metadata a1b86d5d-6f8f-4058-9710-19f5cc40a68a \n",
"2 Event audit.k8s.io/v1 Metadata a1b86d5d-6f8f-4058-9710-19f5cc40a68a \n",
"3 Event audit.k8s.io/v1 Metadata 3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c \n",
"4 Event audit.k8s.io/v1 Metadata 3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c \n",
"... ... ... ... ... \n",
"94282 Event audit.k8s.io/v1 Metadata ee239268-d276-431e-897b-c9e4ae2ec4a2 \n",
"94283 Event audit.k8s.io/v1 Metadata c4027d84-5a11-46f5-808b-cde4c52e68a2 \n",
"94284 Event audit.k8s.io/v1 Metadata ccd5ea66-1586-4453-8634-1428d88795f3 \n",
"94285 Event audit.k8s.io/v1 Metadata 42728a6e-4b34-406e-89e4-fad679bde27b \n",
"94286 Event audit.k8s.io/v1 Metadata db1fa802-3c2b-43f3-ad5d-cb09ac400074 \n",
"\n",
" stage requestURI \\\n",
"0 ResponseStarted /api/v1/nodes?allowWatchBookmarks=true&fieldSe... \n",
"1 ResponseStarted /api/v1/namespaces/kube-flannel/configmaps?all... \n",
"2 ResponseComplete /api/v1/namespaces/kube-flannel/configmaps?all... \n",
"3 ResponseStarted /api/v1/namespaces/kube-system/configmaps?allo... \n",
"4 ResponseComplete /api/v1/namespaces/kube-system/configmaps?allo... \n",
"... ... ... \n",
"94282 ResponseComplete /readyz \n",
"94283 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94284 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94285 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94286 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"\n",
" verb sourceIPs \\\n",
"0 watch [192.168.60.11] \n",
"1 watch [192.168.60.11] \n",
"2 watch [192.168.60.11] \n",
"3 watch [192.168.60.11] \n",
"4 watch [192.168.60.11] \n",
"... ... ... \n",
"94282 get [192.168.60.10] \n",
"94283 get [192.168.60.10] \n",
"94284 get [192.168.60.10] \n",
"94285 update [192.168.60.10] \n",
"94286 update [192.168.60.10] \n",
"\n",
" userAgent \\\n",
"0 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"1 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"2 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"3 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"4 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"... ... \n",
"94282 kube-probe/1.23 \n",
"94283 kube-scheduler/v1.23.15 (linux/amd64) kubernet... \n",
"94284 kube-controller-manager/v1.23.15 (linux/amd64)... \n",
"94285 kube-scheduler/v1.23.15 (linux/amd64) kubernet... \n",
"94286 kube-controller-manager/v1.23.15 (linux/amd64)... \n",
"\n",
" requestReceivedTimestamp ... annotations_k8s.io/removed-release \\\n",
"0 2023-01-27T18:28:08.128513Z ... NaN \n",
"1 2023-01-27T18:28:08.129781Z ... NaN \n",
"2 2023-01-27T18:28:08.129781Z ... NaN \n",
"3 2023-01-27T18:28:08.130396Z ... NaN \n",
"4 2023-01-27T18:28:08.130396Z ... NaN \n",
"... ... ... ... \n",
"94282 2023-01-27T19:03:15.516370Z ... NaN \n",
"94283 2023-01-27T19:03:15.630828Z ... NaN \n",
"94284 2023-01-27T19:03:15.644742Z ... NaN \n",
"94285 2023-01-27T19:03:15.644221Z ... NaN \n",
"94286 2023-01-27T19:03:15.653453Z ... NaN \n",
"\n",
" annotations_pod-security.kubernetes.io/enforce-policy \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_5 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_5 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_7 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_7 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_18 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_16 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_15 \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
"[94287 rows x 43 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file_path = \"../../raw-audit-logs.log\"\n",
"\n",
"# Set the chunk size (number of rows to process at a time)\n",
"chunk_size = 1000 # Adjust based on your available memory\n",
"\n",
"# Create an empty list to hold the chunks\n",
"chunks = []\n",
"\n",
"# Iterate over the file in chunks\n",
"try:\n",
" for chunk in pd.read_json(file_path, lines=True, chunksize=chunk_size, encoding_errors='ignore'):\n",
" # Append each chunk to the list\n",
" chunks.append(chunk)\n",
"except ValueError as e:\n",
" print(f\"Error while parsing JSON: {e}\")\n",
"\n",
"# Combine all chunks into a single DataFrame\n",
"df = pd.concat(chunks, ignore_index=True)\n",
"\n",
"# Display the first few rows of the DataFrame\n",
"#print(df.head())\n",
"#df['user_parsed'] = df['user'].apply(json.loads)\n",
"#df_exploded = df.explode('user')\n",
"\n",
"df_normalized = pd.json_normalize(df.to_dict(orient='records'),\n",
"# record_path=['details'],\n",
"# meta=['timestamp', 'event', ['user', 'id'], ['user', 'name'], ['user', 'email']],\n",
" sep='_'\n",
")\n",
"\n",
"#df.head()\n",
"#df_normalized.info()\n",
"df_normalized.iloc[0:2]\n",
"df_normalized"
]
},
{
"cell_type": "code",
"execution_count": 224,
"id": "c11f7c76-4453-4829-879e-7ac7f1faf541",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2163"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"auditLogDF = pd.read_csv(r\"data/audit-logs.csv\", names=['hostIP', 'Timestamp', 'username', 'verb', 'resourceType', 'subresource', 'ObjectName', 'ObjectName1', 'requestURI', 'auditID', 'stage', 'statusCode', 'UID', 'ReferenceUID', 'UID1'])\n",
"\n",
"#print(auditLogDF)\n",
"\n",
"#print(auditLogDF.count)\n",
"auditLogDF = auditLogDF.drop_duplicates(ignore_index=True)\n",
"\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[0, 2, 5, 8, 9, 10, 11]], axis=1)\n",
"auditLogDF = auditLogDF[\n",
" (auditLogDF['verb'] != \"get\") &\n",
" (auditLogDF['verb'] != \"watch\") &\n",
" (auditLogDF['verb'] != \"list\") &\n",
" (auditLogDF['resourceType'] != \"events\") &\n",
" (auditLogDF['resourceType'] != \"leases\")\n",
" ]\n",
"\n",
"#print(auditLogDF.count)\n",
"auditLogDF = auditLogDF[auditLogDF.resourceType != \"replicationcontrollers\"]\n",
"\n",
"auditLogDF[\"event_type\"] = auditLogDF[\"verb\"] + \"_\" + auditLogDF[\"resourceType\"]\n",
"#auditLogDF = auditLogDF[auditLogDF.event_type != \"delete_service\"]\n",
"#auditLogDF = auditLogDF[auditLogDF.event_type != \"create namespaces\"]\n",
"auditLogDF['ObjectName'] = auditLogDF['ObjectName'].combine_first(auditLogDF['ObjectName1'])\n",
"auditLogDF[\"UID\"] = auditLogDF[\"UID\"].fillna(auditLogDF['UID1'])\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[4, 7]], axis=1)\n",
"auditLogDF[\"UID\"] = auditLogDF[\"ReferenceUID\"].fillna(auditLogDF['UID'])\n",
"#auditLogDF = auditLogDF.sort_values([\"UID\", \"Timestamp\"], ascending = (False, True))\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[0, 5]], axis=1)\n",
"#auditLogDF = auditLogDF.drop_duplicates()\n",
"\n",
"#print(auditLogDF.to_string())\n",
"#print(auditLogDF.count)\n",
"#print(auditLogDF)\n",
"\n",
"#len(auditLogDF)"
]
},
{
"cell_type": "code",
"execution_count": 227,
"id": "6dce47db-06fb-4dca-8eda-056118f7459b",
"metadata": {},
"outputs": [],
"source": [
"\n",
"event_seq = []\n",
"event_sub_seq = []\n",
"for c, r in auditLogDF.iterrows():\n",
" with open(\"data/events-dataset-audit.txt\", \"a\") as event_file:\n",
" event_file.write(\"%s\\n\" % r['event_type'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}