prvng_platform/crates/rag/k8s/06-hpa-ingress.yaml

204 lines
4.5 KiB
YAML
Raw Normal View History

---
# Horizontal Pod Autoscaler for RAG Service
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: provisioning-rag-hpa
namespace: provisioning-rag
labels:
app: provisioning-rag
component: autoscaling
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: provisioning-rag
minReplicas: 3
maxReplicas: 10
metrics:
# Scale based on CPU usage
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Scale based on memory usage
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 1
periodSeconds: 120
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 2
periodSeconds: 30
selectPolicy: Max
---
# Ingress for RAG Service
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: provisioning-rag-ingress
namespace: provisioning-rag
labels:
app: provisioning-rag
component: ingress
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/rate-limit-window: "60s"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "30"
nginx.ingress.kubernetes.io/proxy-send-timeout: "30"
nginx.ingress.kubernetes.io/proxy-read-timeout: "30"
spec:
ingressClassName: nginx
tls:
- hosts:
- rag.example.com
secretName: rag-tls-cert
rules:
- host: rag.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: provisioning-rag
port:
number: 9090
- path: /metrics
pathType: Prefix
backend:
service:
name: provisioning-rag
port:
number: 8888
---
# NetworkPolicy for RAG Service (security)
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: provisioning-rag-netpol
namespace: provisioning-rag
labels:
app: provisioning-rag
component: networking
spec:
podSelector:
matchLabels:
app: provisioning-rag
policyTypes:
- Ingress
- Egress
ingress:
# Allow from Ingress Controller
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
ports:
- protocol: TCP
port: 9090
# Allow from Prometheus
- from:
- podSelector:
matchLabels:
app: prometheus
ports:
- protocol: TCP
port: 8888
# Allow from other RAG pods
- from:
- podSelector:
matchLabels:
app: provisioning-rag
ports:
- protocol: TCP
port: 9090
egress:
# Allow DNS
- to:
- namespaceSelector: {}
ports:
- protocol: UDP
port: 53
# Allow to SurrealDB
- to:
- podSelector:
matchLabels:
app: surrealdb
ports:
- protocol: TCP
port: 8000
# Allow to external HTTPS (for OpenAI, etc.)
- to:
- namespaceSelector: {}
ports:
- protocol: TCP
port: 443
# Allow to external HTTP
- to:
- namespaceSelector: {}
ports:
- protocol: TCP
port: 80
---
# SurrealDB NetworkPolicy
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: surrealdb-netpol
namespace: provisioning-rag
labels:
app: surrealdb
component: networking
spec:
podSelector:
matchLabels:
app: surrealdb
policyTypes:
- Ingress
- Egress
ingress:
# Allow from RAG service
- from:
- podSelector:
matchLabels:
app: provisioning-rag
ports:
- protocol: TCP
port: 8000
egress:
# Allow DNS
- to:
- namespaceSelector: {}
ports:
- protocol: UDP
port: 53