204 lines
4.5 KiB
YAML
204 lines
4.5 KiB
YAML
|
|
---
|
||
|
|
# Horizontal Pod Autoscaler for RAG Service
|
||
|
|
apiVersion: autoscaling/v2
|
||
|
|
kind: HorizontalPodAutoscaler
|
||
|
|
metadata:
|
||
|
|
name: provisioning-rag-hpa
|
||
|
|
namespace: provisioning-rag
|
||
|
|
labels:
|
||
|
|
app: provisioning-rag
|
||
|
|
component: autoscaling
|
||
|
|
spec:
|
||
|
|
scaleTargetRef:
|
||
|
|
apiVersion: apps/v1
|
||
|
|
kind: Deployment
|
||
|
|
name: provisioning-rag
|
||
|
|
minReplicas: 3
|
||
|
|
maxReplicas: 10
|
||
|
|
metrics:
|
||
|
|
# Scale based on CPU usage
|
||
|
|
- type: Resource
|
||
|
|
resource:
|
||
|
|
name: cpu
|
||
|
|
target:
|
||
|
|
type: Utilization
|
||
|
|
averageUtilization: 70
|
||
|
|
# Scale based on memory usage
|
||
|
|
- type: Resource
|
||
|
|
resource:
|
||
|
|
name: memory
|
||
|
|
target:
|
||
|
|
type: Utilization
|
||
|
|
averageUtilization: 80
|
||
|
|
behavior:
|
||
|
|
scaleDown:
|
||
|
|
stabilizationWindowSeconds: 300
|
||
|
|
policies:
|
||
|
|
- type: Percent
|
||
|
|
value: 50
|
||
|
|
periodSeconds: 60
|
||
|
|
- type: Pods
|
||
|
|
value: 1
|
||
|
|
periodSeconds: 120
|
||
|
|
selectPolicy: Min
|
||
|
|
scaleUp:
|
||
|
|
stabilizationWindowSeconds: 0
|
||
|
|
policies:
|
||
|
|
- type: Percent
|
||
|
|
value: 100
|
||
|
|
periodSeconds: 30
|
||
|
|
- type: Pods
|
||
|
|
value: 2
|
||
|
|
periodSeconds: 30
|
||
|
|
selectPolicy: Max
|
||
|
|
|
||
|
|
---
|
||
|
|
# Ingress for RAG Service
|
||
|
|
apiVersion: networking.k8s.io/v1
|
||
|
|
kind: Ingress
|
||
|
|
metadata:
|
||
|
|
name: provisioning-rag-ingress
|
||
|
|
namespace: provisioning-rag
|
||
|
|
labels:
|
||
|
|
app: provisioning-rag
|
||
|
|
component: ingress
|
||
|
|
annotations:
|
||
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||
|
|
nginx.ingress.kubernetes.io/rewrite-target: /
|
||
|
|
nginx.ingress.kubernetes.io/rate-limit: "100"
|
||
|
|
nginx.ingress.kubernetes.io/rate-limit-window: "60s"
|
||
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||
|
|
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
|
||
|
|
nginx.ingress.kubernetes.io/proxy-connect-timeout: "30"
|
||
|
|
nginx.ingress.kubernetes.io/proxy-send-timeout: "30"
|
||
|
|
nginx.ingress.kubernetes.io/proxy-read-timeout: "30"
|
||
|
|
spec:
|
||
|
|
ingressClassName: nginx
|
||
|
|
tls:
|
||
|
|
- hosts:
|
||
|
|
- rag.example.com
|
||
|
|
secretName: rag-tls-cert
|
||
|
|
rules:
|
||
|
|
- host: rag.example.com
|
||
|
|
http:
|
||
|
|
paths:
|
||
|
|
- path: /
|
||
|
|
pathType: Prefix
|
||
|
|
backend:
|
||
|
|
service:
|
||
|
|
name: provisioning-rag
|
||
|
|
port:
|
||
|
|
number: 9090
|
||
|
|
- path: /metrics
|
||
|
|
pathType: Prefix
|
||
|
|
backend:
|
||
|
|
service:
|
||
|
|
name: provisioning-rag
|
||
|
|
port:
|
||
|
|
number: 8888
|
||
|
|
|
||
|
|
---
|
||
|
|
# NetworkPolicy for RAG Service (security)
|
||
|
|
apiVersion: networking.k8s.io/v1
|
||
|
|
kind: NetworkPolicy
|
||
|
|
metadata:
|
||
|
|
name: provisioning-rag-netpol
|
||
|
|
namespace: provisioning-rag
|
||
|
|
labels:
|
||
|
|
app: provisioning-rag
|
||
|
|
component: networking
|
||
|
|
spec:
|
||
|
|
podSelector:
|
||
|
|
matchLabels:
|
||
|
|
app: provisioning-rag
|
||
|
|
policyTypes:
|
||
|
|
- Ingress
|
||
|
|
- Egress
|
||
|
|
ingress:
|
||
|
|
# Allow from Ingress Controller
|
||
|
|
- from:
|
||
|
|
- namespaceSelector:
|
||
|
|
matchLabels:
|
||
|
|
name: ingress-nginx
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 9090
|
||
|
|
# Allow from Prometheus
|
||
|
|
- from:
|
||
|
|
- podSelector:
|
||
|
|
matchLabels:
|
||
|
|
app: prometheus
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 8888
|
||
|
|
# Allow from other RAG pods
|
||
|
|
- from:
|
||
|
|
- podSelector:
|
||
|
|
matchLabels:
|
||
|
|
app: provisioning-rag
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 9090
|
||
|
|
egress:
|
||
|
|
# Allow DNS
|
||
|
|
- to:
|
||
|
|
- namespaceSelector: {}
|
||
|
|
ports:
|
||
|
|
- protocol: UDP
|
||
|
|
port: 53
|
||
|
|
# Allow to SurrealDB
|
||
|
|
- to:
|
||
|
|
- podSelector:
|
||
|
|
matchLabels:
|
||
|
|
app: surrealdb
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 8000
|
||
|
|
# Allow to external HTTPS (for OpenAI, etc.)
|
||
|
|
- to:
|
||
|
|
- namespaceSelector: {}
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 443
|
||
|
|
# Allow to external HTTP
|
||
|
|
- to:
|
||
|
|
- namespaceSelector: {}
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 80
|
||
|
|
|
||
|
|
---
|
||
|
|
# SurrealDB NetworkPolicy
|
||
|
|
apiVersion: networking.k8s.io/v1
|
||
|
|
kind: NetworkPolicy
|
||
|
|
metadata:
|
||
|
|
name: surrealdb-netpol
|
||
|
|
namespace: provisioning-rag
|
||
|
|
labels:
|
||
|
|
app: surrealdb
|
||
|
|
component: networking
|
||
|
|
spec:
|
||
|
|
podSelector:
|
||
|
|
matchLabels:
|
||
|
|
app: surrealdb
|
||
|
|
policyTypes:
|
||
|
|
- Ingress
|
||
|
|
- Egress
|
||
|
|
ingress:
|
||
|
|
# Allow from RAG service
|
||
|
|
- from:
|
||
|
|
- podSelector:
|
||
|
|
matchLabels:
|
||
|
|
app: provisioning-rag
|
||
|
|
ports:
|
||
|
|
- protocol: TCP
|
||
|
|
port: 8000
|
||
|
|
egress:
|
||
|
|
# Allow DNS
|
||
|
|
- to:
|
||
|
|
- namespaceSelector: {}
|
||
|
|
ports:
|
||
|
|
- protocol: UDP
|
||
|
|
port: 53
|