perfspec-learning/process/python/process.ipynb
2025-01-20 07:55:53 +00:00

657 lines
30 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bc478b25-756f-4686-8995-a079add279b4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0a7982a2-a348-43c7-93b8-774605276a60",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>kind</th>\n",
" <th>apiVersion</th>\n",
" <th>level</th>\n",
" <th>auditID</th>\n",
" <th>stage</th>\n",
" <th>requestURI</th>\n",
" <th>verb</th>\n",
" <th>sourceIPs</th>\n",
" <th>userAgent</th>\n",
" <th>requestReceivedTimestamp</th>\n",
" <th>...</th>\n",
" <th>annotations_k8s.io/removed-release</th>\n",
" <th>annotations_pod-security.kubernetes.io/enforce-policy</th>\n",
" <th>annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_5</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_5</th>\n",
" <th>annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_7</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_7</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_18</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_16</th>\n",
" <th>annotations</th>\n",
" <th>annotations_mutation.webhook.admission.k8s.io/round_0_index_15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>4fed3bf5-d1a4-4bf3-a137-bb9ff5380486</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/nodes?allowWatchBookmarks=true&amp;fieldSe...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.128513Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>a1b86d5d-6f8f-4058-9710-19f5cc40a68a</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/namespaces/kube-flannel/configmaps?all...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.129781Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>a1b86d5d-6f8f-4058-9710-19f5cc40a68a</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/api/v1/namespaces/kube-flannel/configmaps?all...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.129781Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c</td>\n",
" <td>ResponseStarted</td>\n",
" <td>/api/v1/namespaces/kube-system/configmaps?allo...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.130396Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/api/v1/namespaces/kube-system/configmaps?allo...</td>\n",
" <td>watch</td>\n",
" <td>[192.168.60.11]</td>\n",
" <td>kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc</td>\n",
" <td>2023-01-27T18:28:08.130396Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94282</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>ee239268-d276-431e-897b-c9e4ae2ec4a2</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/readyz</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-probe/1.23</td>\n",
" <td>2023-01-27T19:03:15.516370Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94283</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>c4027d84-5a11-46f5-808b-cde4c52e68a2</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-scheduler/v1.23.15 (linux/amd64) kubernet...</td>\n",
" <td>2023-01-27T19:03:15.630828Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94284</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>ccd5ea66-1586-4453-8634-1428d88795f3</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>get</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-controller-manager/v1.23.15 (linux/amd64)...</td>\n",
" <td>2023-01-27T19:03:15.644742Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94285</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>42728a6e-4b34-406e-89e4-fad679bde27b</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>update</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-scheduler/v1.23.15 (linux/amd64) kubernet...</td>\n",
" <td>2023-01-27T19:03:15.644221Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94286</th>\n",
" <td>Event</td>\n",
" <td>audit.k8s.io/v1</td>\n",
" <td>Metadata</td>\n",
" <td>db1fa802-3c2b-43f3-ad5d-cb09ac400074</td>\n",
" <td>ResponseComplete</td>\n",
" <td>/apis/coordination.k8s.io/v1/namespaces/kube-s...</td>\n",
" <td>update</td>\n",
" <td>[192.168.60.10]</td>\n",
" <td>kube-controller-manager/v1.23.15 (linux/amd64)...</td>\n",
" <td>2023-01-27T19:03:15.653453Z</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>94287 rows × 43 columns</p>\n",
"</div>"
],
"text/plain": [
" kind apiVersion level auditID \\\n",
"0 Event audit.k8s.io/v1 Metadata 4fed3bf5-d1a4-4bf3-a137-bb9ff5380486 \n",
"1 Event audit.k8s.io/v1 Metadata a1b86d5d-6f8f-4058-9710-19f5cc40a68a \n",
"2 Event audit.k8s.io/v1 Metadata a1b86d5d-6f8f-4058-9710-19f5cc40a68a \n",
"3 Event audit.k8s.io/v1 Metadata 3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c \n",
"4 Event audit.k8s.io/v1 Metadata 3ba9cc6d-7d10-40c5-8ec7-da60a9b3627c \n",
"... ... ... ... ... \n",
"94282 Event audit.k8s.io/v1 Metadata ee239268-d276-431e-897b-c9e4ae2ec4a2 \n",
"94283 Event audit.k8s.io/v1 Metadata c4027d84-5a11-46f5-808b-cde4c52e68a2 \n",
"94284 Event audit.k8s.io/v1 Metadata ccd5ea66-1586-4453-8634-1428d88795f3 \n",
"94285 Event audit.k8s.io/v1 Metadata 42728a6e-4b34-406e-89e4-fad679bde27b \n",
"94286 Event audit.k8s.io/v1 Metadata db1fa802-3c2b-43f3-ad5d-cb09ac400074 \n",
"\n",
" stage requestURI \\\n",
"0 ResponseStarted /api/v1/nodes?allowWatchBookmarks=true&fieldSe... \n",
"1 ResponseStarted /api/v1/namespaces/kube-flannel/configmaps?all... \n",
"2 ResponseComplete /api/v1/namespaces/kube-flannel/configmaps?all... \n",
"3 ResponseStarted /api/v1/namespaces/kube-system/configmaps?allo... \n",
"4 ResponseComplete /api/v1/namespaces/kube-system/configmaps?allo... \n",
"... ... ... \n",
"94282 ResponseComplete /readyz \n",
"94283 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94284 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94285 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"94286 ResponseComplete /apis/coordination.k8s.io/v1/namespaces/kube-s... \n",
"\n",
" verb sourceIPs \\\n",
"0 watch [192.168.60.11] \n",
"1 watch [192.168.60.11] \n",
"2 watch [192.168.60.11] \n",
"3 watch [192.168.60.11] \n",
"4 watch [192.168.60.11] \n",
"... ... ... \n",
"94282 get [192.168.60.10] \n",
"94283 get [192.168.60.10] \n",
"94284 get [192.168.60.10] \n",
"94285 update [192.168.60.10] \n",
"94286 update [192.168.60.10] \n",
"\n",
" userAgent \\\n",
"0 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"1 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"2 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"3 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"4 kubelet/v1.23.14 (linux/amd64) kubernetes/3321ffc \n",
"... ... \n",
"94282 kube-probe/1.23 \n",
"94283 kube-scheduler/v1.23.15 (linux/amd64) kubernet... \n",
"94284 kube-controller-manager/v1.23.15 (linux/amd64)... \n",
"94285 kube-scheduler/v1.23.15 (linux/amd64) kubernet... \n",
"94286 kube-controller-manager/v1.23.15 (linux/amd64)... \n",
"\n",
" requestReceivedTimestamp ... annotations_k8s.io/removed-release \\\n",
"0 2023-01-27T18:28:08.128513Z ... NaN \n",
"1 2023-01-27T18:28:08.129781Z ... NaN \n",
"2 2023-01-27T18:28:08.129781Z ... NaN \n",
"3 2023-01-27T18:28:08.130396Z ... NaN \n",
"4 2023-01-27T18:28:08.130396Z ... NaN \n",
"... ... ... ... \n",
"94282 2023-01-27T19:03:15.516370Z ... NaN \n",
"94283 2023-01-27T19:03:15.630828Z ... NaN \n",
"94284 2023-01-27T19:03:15.644742Z ... NaN \n",
"94285 2023-01-27T19:03:15.644221Z ... NaN \n",
"94286 2023-01-27T19:03:15.653453Z ... NaN \n",
"\n",
" annotations_pod-security.kubernetes.io/enforce-policy \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_5 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_5 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_failed-open.mutation.webhook.admission.k8s.io/round_0_index_7 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_7 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_18 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_16 \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
" annotations_mutation.webhook.admission.k8s.io/round_0_index_15 \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"94282 NaN \n",
"94283 NaN \n",
"94284 NaN \n",
"94285 NaN \n",
"94286 NaN \n",
"\n",
"[94287 rows x 43 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file_path = \"../../raw-audit-logs.log\"\n",
"\n",
"# Set the chunk size (number of rows to process at a time)\n",
"chunk_size = 1000 # Adjust based on your available memory\n",
"\n",
"# Create an empty list to hold the chunks\n",
"chunks = []\n",
"\n",
"# Iterate over the file in chunks\n",
"try:\n",
" for chunk in pd.read_json(file_path, lines=True, chunksize=chunk_size, encoding_errors='ignore'):\n",
" # Append each chunk to the list\n",
" chunks.append(chunk)\n",
"except ValueError as e:\n",
" print(f\"Error while parsing JSON: {e}\")\n",
"\n",
"# Combine all chunks into a single DataFrame\n",
"df = pd.concat(chunks, ignore_index=True)\n",
"\n",
"# Display the first few rows of the DataFrame\n",
"#print(df.head())\n",
"#df['user_parsed'] = df['user'].apply(json.loads)\n",
"#df_exploded = df.explode('user')\n",
"\n",
"df_normalized = pd.json_normalize(df.to_dict(orient='records'),\n",
"# record_path=['details'],\n",
"# meta=['timestamp', 'event', ['user', 'id'], ['user', 'name'], ['user', 'email']],\n",
" sep='_'\n",
")\n",
"\n",
"#df.head()\n",
"#df_normalized.info()\n",
"df_normalized.iloc[0:2]\n",
"df_normalized"
]
},
{
"cell_type": "code",
"execution_count": 224,
"id": "c11f7c76-4453-4829-879e-7ac7f1faf541",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2163"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"auditLogDF = pd.read_csv(r\"data/audit-logs.csv\", names=['hostIP', 'Timestamp', 'username', 'verb', 'resourceType', 'subresource', 'ObjectName', 'ObjectName1', 'requestURI', 'auditID', 'stage', 'statusCode', 'UID', 'ReferenceUID', 'UID1'])\n",
"\n",
"#print(auditLogDF)\n",
"\n",
"#print(auditLogDF.count)\n",
"auditLogDF = auditLogDF.drop_duplicates(ignore_index=True)\n",
"\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[0, 2, 5, 8, 9, 10, 11]], axis=1)\n",
"auditLogDF = auditLogDF[\n",
" (auditLogDF['verb'] != \"get\") &\n",
" (auditLogDF['verb'] != \"watch\") &\n",
" (auditLogDF['verb'] != \"list\") &\n",
" (auditLogDF['resourceType'] != \"events\") &\n",
" (auditLogDF['resourceType'] != \"leases\")\n",
" ]\n",
"\n",
"#print(auditLogDF.count)\n",
"auditLogDF = auditLogDF[auditLogDF.resourceType != \"replicationcontrollers\"]\n",
"\n",
"auditLogDF[\"event_type\"] = auditLogDF[\"verb\"] + \"_\" + auditLogDF[\"resourceType\"]\n",
"#auditLogDF = auditLogDF[auditLogDF.event_type != \"delete_service\"]\n",
"#auditLogDF = auditLogDF[auditLogDF.event_type != \"create namespaces\"]\n",
"auditLogDF['ObjectName'] = auditLogDF['ObjectName'].combine_first(auditLogDF['ObjectName1'])\n",
"auditLogDF[\"UID\"] = auditLogDF[\"UID\"].fillna(auditLogDF['UID1'])\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[4, 7]], axis=1)\n",
"auditLogDF[\"UID\"] = auditLogDF[\"ReferenceUID\"].fillna(auditLogDF['UID'])\n",
"#auditLogDF = auditLogDF.sort_values([\"UID\", \"Timestamp\"], ascending = (False, True))\n",
"auditLogDF = auditLogDF.drop(auditLogDF.columns[[0, 5]], axis=1)\n",
"#auditLogDF = auditLogDF.drop_duplicates()\n",
"\n",
"#print(auditLogDF.to_string())\n",
"#print(auditLogDF.count)\n",
"#print(auditLogDF)\n",
"\n",
"#len(auditLogDF)"
]
},
{
"cell_type": "code",
"execution_count": 227,
"id": "6dce47db-06fb-4dca-8eda-056118f7459b",
"metadata": {},
"outputs": [],
"source": [
"\n",
"event_seq = []\n",
"event_sub_seq = []\n",
"for c, r in auditLogDF.iterrows():\n",
" with open(\"data/events-dataset-audit.txt\", \"a\") as event_file:\n",
" event_file.write(\"%s\\n\" % r['event_type'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}