--- # Horizontal Pod Autoscaler for RAG Service apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: provisioning-rag-hpa namespace: provisioning-rag labels: app: provisioning-rag component: autoscaling spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: provisioning-rag minReplicas: 3 maxReplicas: 10 metrics: # Scale based on CPU usage - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 70 # Scale based on memory usage - type: Resource resource: name: memory target: type: Utilization averageUtilization: 80 behavior: scaleDown: stabilizationWindowSeconds: 300 policies: - type: Percent value: 50 periodSeconds: 60 - type: Pods value: 1 periodSeconds: 120 selectPolicy: Min scaleUp: stabilizationWindowSeconds: 0 policies: - type: Percent value: 100 periodSeconds: 30 - type: Pods value: 2 periodSeconds: 30 selectPolicy: Max --- # Ingress for RAG Service apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: provisioning-rag-ingress namespace: provisioning-rag labels: app: provisioning-rag component: ingress annotations: cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/rewrite-target: / nginx.ingress.kubernetes.io/rate-limit: "100" nginx.ingress.kubernetes.io/rate-limit-window: "60s" nginx.ingress.kubernetes.io/ssl-redirect: "true" nginx.ingress.kubernetes.io/force-ssl-redirect: "true" nginx.ingress.kubernetes.io/proxy-body-size: "10m" nginx.ingress.kubernetes.io/proxy-connect-timeout: "30" nginx.ingress.kubernetes.io/proxy-send-timeout: "30" nginx.ingress.kubernetes.io/proxy-read-timeout: "30" spec: ingressClassName: nginx tls: - hosts: - rag.example.com secretName: rag-tls-cert rules: - host: rag.example.com http: paths: - path: / pathType: Prefix backend: service: name: provisioning-rag port: number: 9090 - path: /metrics pathType: Prefix backend: service: name: provisioning-rag port: number: 8888 --- # NetworkPolicy for RAG Service (security) apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: provisioning-rag-netpol namespace: provisioning-rag labels: app: provisioning-rag component: networking spec: podSelector: matchLabels: app: provisioning-rag policyTypes: - Ingress - Egress ingress: # Allow from Ingress Controller - from: - namespaceSelector: matchLabels: name: ingress-nginx ports: - protocol: TCP port: 9090 # Allow from Prometheus - from: - podSelector: matchLabels: app: prometheus ports: - protocol: TCP port: 8888 # Allow from other RAG pods - from: - podSelector: matchLabels: app: provisioning-rag ports: - protocol: TCP port: 9090 egress: # Allow DNS - to: - namespaceSelector: {} ports: - protocol: UDP port: 53 # Allow to SurrealDB - to: - podSelector: matchLabels: app: surrealdb ports: - protocol: TCP port: 8000 # Allow to external HTTPS (for OpenAI, etc.) - to: - namespaceSelector: {} ports: - protocol: TCP port: 443 # Allow to external HTTP - to: - namespaceSelector: {} ports: - protocol: TCP port: 80 --- # SurrealDB NetworkPolicy apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: surrealdb-netpol namespace: provisioning-rag labels: app: surrealdb component: networking spec: podSelector: matchLabels: app: surrealdb policyTypes: - Ingress - Egress ingress: # Allow from RAG service - from: - podSelector: matchLabels: app: provisioning-rag ports: - protocol: TCP port: 8000 egress: # Allow DNS - to: - namespaceSelector: {} ports: - protocol: UDP port: 53