provisioning/docs/book/user/troubleshooting-guide.html

1083 lines
41 KiB
HTML
Raw Permalink Normal View History

<!DOCTYPE HTML>
<html lang="en" class="ayu sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>Troubleshooting Guide - Provisioning Platform Documentation</title>
<!-- Custom HTML head -->
<meta name="description" content="Complete documentation for the Provisioning Platform - Infrastructure automation with Nushell, KCL, and Rust">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "ayu";
const default_dark_theme = "navy";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="mdbook-help-container">
<div id="mdbook-help-popup">
<h2 class="mdbook-help-title">Keyboard shortcuts</h2>
<div>
<p>Press <kbd></kbd> or <kbd></kbd> to navigate between chapters</p>
<p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p>
<p>Press <kbd>?</kbd> to show this help</p>
<p>Press <kbd>Esc</kbd> to hide this help</p>
</div>
</div>
</div>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('ayu')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">Provisioning Platform Documentation</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/provisioning/provisioning-platform" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/provisioning/provisioning-platform/edit/main/provisioning/docs/src/user/troubleshooting-guide.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="troubleshooting-guide"><a class="header" href="#troubleshooting-guide">Troubleshooting Guide</a></h1>
<p>This comprehensive troubleshooting guide helps you diagnose and resolve common issues with Infrastructure Automation.</p>
<h2 id="what-youll-learn"><a class="header" href="#what-youll-learn">What Youll Learn</a></h2>
<ul>
<li>Common issues and their solutions</li>
<li>Diagnostic commands and techniques</li>
<li>Error message interpretation</li>
<li>Performance optimization</li>
<li>Recovery procedures</li>
<li>Prevention strategies</li>
</ul>
<h2 id="general-troubleshooting-approach"><a class="header" href="#general-troubleshooting-approach">General Troubleshooting Approach</a></h2>
<h3 id="1-identify-the-problem"><a class="header" href="#1-identify-the-problem">1. Identify the Problem</a></h3>
<pre><code class="language-bash"># Check overall system status
provisioning env
provisioning validate config
# Check specific component status
provisioning show servers --infra my-infra
provisioning taskserv list --infra my-infra --installed
</code></pre>
<h3 id="2-gather-information"><a class="header" href="#2-gather-information">2. Gather Information</a></h3>
<pre><code class="language-bash"># Enable debug mode for detailed output
provisioning --debug &lt;command&gt;
# Check logs and errors
provisioning show logs --infra my-infra
</code></pre>
<h3 id="3-use-diagnostic-commands"><a class="header" href="#3-use-diagnostic-commands">3. Use Diagnostic Commands</a></h3>
<pre><code class="language-bash"># Validate configuration
provisioning validate config --detailed
# Test connectivity
provisioning provider test aws
provisioning network test --infra my-infra
</code></pre>
<h2 id="installation-and-setup-issues"><a class="header" href="#installation-and-setup-issues">Installation and Setup Issues</a></h2>
<h3 id="issue-installation-fails"><a class="header" href="#issue-installation-fails">Issue: Installation Fails</a></h3>
<p><strong>Symptoms:</strong></p>
<ul>
<li>Installation script errors</li>
<li>Missing dependencies</li>
<li>Permission denied errors</li>
</ul>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check system requirements
uname -a
df -h
whoami
# Check permissions
ls -la /usr/local/
sudo -l
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="permission-issues"><a class="header" href="#permission-issues">Permission Issues</a></h4>
<pre><code class="language-bash"># Run installer with sudo
sudo ./install-provisioning
# Or install to user directory
./install-provisioning --prefix=$HOME/provisioning
export PATH="$HOME/provisioning/bin:$PATH"
</code></pre>
<h4 id="missing-dependencies"><a class="header" href="#missing-dependencies">Missing Dependencies</a></h4>
<pre><code class="language-bash"># Ubuntu/Debian
sudo apt update
sudo apt install -y curl wget tar build-essential
# RHEL/CentOS
sudo dnf install -y curl wget tar gcc make
</code></pre>
<h4 id="architecture-issues"><a class="header" href="#architecture-issues">Architecture Issues</a></h4>
<pre><code class="language-bash"># Check architecture
uname -m
# Download correct architecture package
# x86_64: Intel/AMD 64-bit
# arm64: ARM 64-bit (Apple Silicon)
wget https://releases.example.com/provisioning-linux-x86_64.tar.gz
</code></pre>
<h3 id="issue-command-not-found"><a class="header" href="#issue-command-not-found">Issue: Command Not Found</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>bash: provisioning: command not found
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check if provisioning is installed
which provisioning
ls -la /usr/local/bin/provisioning
# Check PATH
echo $PATH
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Add to PATH
export PATH="/usr/local/bin:$PATH"
# Make permanent (add to shell profile)
echo 'export PATH="/usr/local/bin:$PATH"' &gt;&gt; ~/.bashrc
source ~/.bashrc
# Create symlink if missing
sudo ln -sf /usr/local/provisioning/core/nulib/provisioning /usr/local/bin/provisioning
</code></pre>
<h3 id="issue-nushell-plugin-errors"><a class="header" href="#issue-nushell-plugin-errors">Issue: Nushell Plugin Errors</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Plugin not found: nu_plugin_kcl
Plugin registration failed
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check Nushell version
nu --version
# Check KCL installation (required for nu_plugin_kcl)
kcl version
# Check plugin registration
nu -c "version | get installed_plugins"
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Install KCL CLI (required for nu_plugin_kcl)
# Download from: https://github.com/kcl-lang/cli/releases
# Re-register plugins
nu -c "plugin add /usr/local/provisioning/plugins/nu_plugin_kcl"
nu -c "plugin add /usr/local/provisioning/plugins/nu_plugin_tera"
# Restart Nushell after plugin registration
</code></pre>
<h2 id="configuration-issues"><a class="header" href="#configuration-issues">Configuration Issues</a></h2>
<h3 id="issue-configuration-not-found"><a class="header" href="#issue-configuration-not-found">Issue: Configuration Not Found</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Configuration file not found
Failed to load configuration
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check configuration file locations
provisioning env | grep config
# Check if files exist
ls -la ~/.config/provisioning/
ls -la /usr/local/provisioning/config.defaults.toml
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Initialize user configuration
provisioning init config
# Create missing directories
mkdir -p ~/.config/provisioning
# Copy template
cp /usr/local/provisioning/config-examples/config.user.toml ~/.config/provisioning/config.toml
# Verify configuration
provisioning validate config
</code></pre>
<h3 id="issue-configuration-validation-errors"><a class="header" href="#issue-configuration-validation-errors">Issue: Configuration Validation Errors</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Configuration validation failed
Invalid configuration value
Missing required field
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Detailed validation
provisioning validate config --detailed
# Check specific sections
provisioning config show --section paths
provisioning config show --section providers
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="path-configuration-issues"><a class="header" href="#path-configuration-issues">Path Configuration Issues</a></h4>
<pre><code class="language-bash"># Check base path exists
ls -la /path/to/provisioning
# Update configuration
nano ~/.config/provisioning/config.toml
# Fix paths section
[paths]
base = "/correct/path/to/provisioning"
</code></pre>
<h4 id="provider-configuration-issues"><a class="header" href="#provider-configuration-issues">Provider Configuration Issues</a></h4>
<pre><code class="language-bash"># Test provider connectivity
provisioning provider test aws
# Check credentials
aws configure list # For AWS
upcloud-cli config # For UpCloud
# Update provider configuration
[providers.aws]
interface = "CLI" # or "API"
</code></pre>
<h3 id="issue-interpolation-failures"><a class="header" href="#issue-interpolation-failures">Issue: Interpolation Failures</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Interpolation pattern not resolved: {{env.VARIABLE}}
Template rendering failed
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Test interpolation
provisioning validate interpolation test
# Check environment variables
env | grep VARIABLE
# Debug interpolation
provisioning --debug validate interpolation validate
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Set missing environment variables
export MISSING_VARIABLE="value"
# Use fallback values in configuration
config_value = "{{env.VARIABLE || 'default_value'}}"
# Check interpolation syntax
# Correct: {{env.HOME}}
# Incorrect: ${HOME} or $HOME
</code></pre>
<h2 id="server-management-issues"><a class="header" href="#server-management-issues">Server Management Issues</a></h2>
<h3 id="issue-server-creation-fails"><a class="header" href="#issue-server-creation-fails">Issue: Server Creation Fails</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Failed to create server
Provider API error
Insufficient quota
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check provider status
provisioning provider status aws
# Test connectivity
ping api.provider.com
curl -I https://api.provider.com
# Check quota
provisioning provider quota --infra my-infra
# Debug server creation
provisioning --debug server create web-01 --infra my-infra --check
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="api-authentication-issues"><a class="header" href="#api-authentication-issues">API Authentication Issues</a></h4>
<pre><code class="language-bash"># AWS
aws configure list
aws sts get-caller-identity
# UpCloud
upcloud-cli account show
# Update credentials
aws configure # For AWS
export UPCLOUD_USERNAME="your-username"
export UPCLOUD_PASSWORD="your-password"
</code></pre>
<h4 id="quotalimit-issues"><a class="header" href="#quotalimit-issues">Quota/Limit Issues</a></h4>
<pre><code class="language-bash"># Check current usage
provisioning show costs --infra my-infra
# Request quota increase from provider
# Or reduce resource requirements
# Use smaller instance types
# Reduce number of servers
</code></pre>
<h4 id="networkconnectivity-issues"><a class="header" href="#networkconnectivity-issues">Network/Connectivity Issues</a></h4>
<pre><code class="language-bash"># Test network connectivity
curl -v https://api.aws.amazon.com
curl -v https://api.upcloud.com
# Check DNS resolution
nslookup api.aws.amazon.com
# Check firewall rules
# Ensure outbound HTTPS (port 443) is allowed
</code></pre>
<h3 id="issue-ssh-access-fails"><a class="header" href="#issue-ssh-access-fails">Issue: SSH Access Fails</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Connection refused
Permission denied
Host key verification failed
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check server status
provisioning server list --infra my-infra
# Test SSH manually
ssh -v user@server-ip
# Check SSH configuration
provisioning show servers web-01 --infra my-infra
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="connection-issues"><a class="header" href="#connection-issues">Connection Issues</a></h4>
<pre><code class="language-bash"># Wait for server to be fully ready
provisioning server list --infra my-infra --status
# Check security groups/firewall
# Ensure SSH (port 22) is allowed
# Use correct IP address
provisioning show servers web-01 --infra my-infra | grep ip
</code></pre>
<h4 id="authentication-issues"><a class="header" href="#authentication-issues">Authentication Issues</a></h4>
<pre><code class="language-bash"># Check SSH key
ls -la ~/.ssh/
ssh-add -l
# Generate new key if needed
ssh-keygen -t ed25519 -f ~/.ssh/provisioning_key
# Use specific key
provisioning server ssh web-01 --key ~/.ssh/provisioning_key --infra my-infra
</code></pre>
<h4 id="host-key-issues"><a class="header" href="#host-key-issues">Host Key Issues</a></h4>
<pre><code class="language-bash"># Remove old host key
ssh-keygen -R server-ip
# Accept new host key
ssh -o StrictHostKeyChecking=accept-new user@server-ip
</code></pre>
<h2 id="task-service-issues"><a class="header" href="#task-service-issues">Task Service Issues</a></h2>
<h3 id="issue-service-installation-fails"><a class="header" href="#issue-service-installation-fails">Issue: Service Installation Fails</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Service installation failed
Package not found
Dependency conflicts
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check service prerequisites
provisioning taskserv check kubernetes --infra my-infra
# Debug installation
provisioning --debug taskserv create kubernetes --infra my-infra --check
# Check server resources
provisioning server ssh web-01 --command "free -h &amp;&amp; df -h" --infra my-infra
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="resource-issues"><a class="header" href="#resource-issues">Resource Issues</a></h4>
<pre><code class="language-bash"># Check available resources
provisioning server ssh web-01 --command "
echo 'Memory:' &amp;&amp; free -h
echo 'Disk:' &amp;&amp; df -h
echo 'CPU:' &amp;&amp; nproc
" --infra my-infra
# Upgrade server if needed
provisioning server resize web-01 --plan larger-plan --infra my-infra
</code></pre>
<h4 id="package-repository-issues"><a class="header" href="#package-repository-issues">Package Repository Issues</a></h4>
<pre><code class="language-bash"># Update package lists
provisioning server ssh web-01 --command "
sudo apt update &amp;&amp; sudo apt upgrade -y
" --infra my-infra
# Check repository connectivity
provisioning server ssh web-01 --command "
curl -I https://download.docker.com/linux/ubuntu/
" --infra my-infra
</code></pre>
<h4 id="dependency-issues"><a class="header" href="#dependency-issues">Dependency Issues</a></h4>
<pre><code class="language-bash"># Install missing dependencies
provisioning taskserv create containerd --infra my-infra
# Then install dependent service
provisioning taskserv create kubernetes --infra my-infra
</code></pre>
<h3 id="issue-service-not-running"><a class="header" href="#issue-service-not-running">Issue: Service Not Running</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Service status: failed
Service not responding
Health check failures
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check service status
provisioning taskserv status kubernetes --infra my-infra
# Check service logs
provisioning taskserv logs kubernetes --infra my-infra
# SSH and check manually
provisioning server ssh web-01 --command "
sudo systemctl status kubernetes
sudo journalctl -u kubernetes --no-pager -n 50
" --infra my-infra
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="configuration-issues-1"><a class="header" href="#configuration-issues-1">Configuration Issues</a></h4>
<pre><code class="language-bash"># Reconfigure service
provisioning taskserv configure kubernetes --infra my-infra
# Reset to defaults
provisioning taskserv reset kubernetes --infra my-infra
</code></pre>
<h4 id="port-conflicts"><a class="header" href="#port-conflicts">Port Conflicts</a></h4>
<pre><code class="language-bash"># Check port usage
provisioning server ssh web-01 --command "
sudo netstat -tulpn | grep :6443
sudo ss -tulpn | grep :6443
" --infra my-infra
# Change port configuration or stop conflicting service
</code></pre>
<h4 id="permission-issues-1"><a class="header" href="#permission-issues-1">Permission Issues</a></h4>
<pre><code class="language-bash"># Fix permissions
provisioning server ssh web-01 --command "
sudo chown -R kubernetes:kubernetes /var/lib/kubernetes
sudo chmod 600 /etc/kubernetes/admin.conf
" --infra my-infra
</code></pre>
<h2 id="cluster-management-issues"><a class="header" href="#cluster-management-issues">Cluster Management Issues</a></h2>
<h3 id="issue-cluster-deployment-fails"><a class="header" href="#issue-cluster-deployment-fails">Issue: Cluster Deployment Fails</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Cluster deployment failed
Pod creation errors
Service unavailable
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check cluster status
provisioning cluster status web-cluster --infra my-infra
# Check Kubernetes cluster
provisioning server ssh master-01 --command "
kubectl get nodes
kubectl get pods --all-namespaces
" --infra my-infra
# Check cluster logs
provisioning cluster logs web-cluster --infra my-infra
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="node-issues"><a class="header" href="#node-issues">Node Issues</a></h4>
<pre><code class="language-bash"># Check node status
provisioning server ssh master-01 --command "
kubectl describe nodes
" --infra my-infra
# Drain and rejoin problematic nodes
provisioning server ssh master-01 --command "
kubectl drain worker-01 --ignore-daemonsets
kubectl delete node worker-01
" --infra my-infra
# Rejoin node
provisioning taskserv configure kubernetes --infra my-infra --servers worker-01
</code></pre>
<h4 id="resource-constraints"><a class="header" href="#resource-constraints">Resource Constraints</a></h4>
<pre><code class="language-bash"># Check resource usage
provisioning server ssh master-01 --command "
kubectl top nodes
kubectl top pods --all-namespaces
" --infra my-infra
# Scale down or add more nodes
provisioning cluster scale web-cluster --replicas 3 --infra my-infra
provisioning server create worker-04 --infra my-infra
</code></pre>
<h4 id="network-issues"><a class="header" href="#network-issues">Network Issues</a></h4>
<pre><code class="language-bash"># Check network plugin
provisioning server ssh master-01 --command "
kubectl get pods -n kube-system | grep cilium
" --infra my-infra
# Restart network plugin
provisioning taskserv restart cilium --infra my-infra
</code></pre>
<h2 id="performance-issues"><a class="header" href="#performance-issues">Performance Issues</a></h2>
<h3 id="issue-slow-operations"><a class="header" href="#issue-slow-operations">Issue: Slow Operations</a></h3>
<p><strong>Symptoms:</strong></p>
<ul>
<li>Commands take very long to complete</li>
<li>Timeouts during operations</li>
<li>High CPU/memory usage</li>
</ul>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check system resources
top
htop
free -h
df -h
# Check network latency
ping api.aws.amazon.com
traceroute api.aws.amazon.com
# Profile command execution
time provisioning server list --infra my-infra
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="local-system-issues"><a class="header" href="#local-system-issues">Local System Issues</a></h4>
<pre><code class="language-bash"># Close unnecessary applications
# Upgrade system resources
# Use SSD storage if available
# Increase timeout values
export PROVISIONING_TIMEOUT=600 # 10 minutes
</code></pre>
<h4 id="network-issues-1"><a class="header" href="#network-issues-1">Network Issues</a></h4>
<pre><code class="language-bash"># Use region closer to your location
[providers.aws]
region = "us-west-1" # Closer region
# Enable connection pooling/caching
[cache]
enabled = true
</code></pre>
<h4 id="large-infrastructure-issues"><a class="header" href="#large-infrastructure-issues">Large Infrastructure Issues</a></h4>
<pre><code class="language-bash"># Use parallel operations
provisioning server create --infra my-infra --parallel 4
# Filter results
provisioning server list --infra my-infra --filter "status == 'running'"
</code></pre>
<h3 id="issue-high-memory-usage"><a class="header" href="#issue-high-memory-usage">Issue: High Memory Usage</a></h3>
<p><strong>Symptoms:</strong></p>
<ul>
<li>System becomes unresponsive</li>
<li>Out of memory errors</li>
<li>Swap usage high</li>
</ul>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check memory usage
free -h
ps aux --sort=-%mem | head
# Check for memory leaks
valgrind provisioning server list --infra my-infra
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Increase system memory
# Close other applications
# Use streaming operations for large datasets
# Enable garbage collection
export PROVISIONING_GC_ENABLED=true
# Reduce concurrent operations
export PROVISIONING_MAX_PARALLEL=2
</code></pre>
<h2 id="network-and-connectivity-issues"><a class="header" href="#network-and-connectivity-issues">Network and Connectivity Issues</a></h2>
<h3 id="issue-api-connectivity-problems"><a class="header" href="#issue-api-connectivity-problems">Issue: API Connectivity Problems</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Connection timeout
DNS resolution failed
SSL certificate errors
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Test basic connectivity
ping 8.8.8.8
curl -I https://api.aws.amazon.com
nslookup api.upcloud.com
# Check SSL certificates
openssl s_client -connect api.aws.amazon.com:443 -servername api.aws.amazon.com
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="dns-issues"><a class="header" href="#dns-issues">DNS Issues</a></h4>
<pre><code class="language-bash"># Use alternative DNS
echo 'nameserver 8.8.8.8' | sudo tee /etc/resolv.conf
# Clear DNS cache
sudo systemctl restart systemd-resolved # Ubuntu
sudo dscacheutil -flushcache # macOS
</code></pre>
<h4 id="proxyfirewall-issues"><a class="header" href="#proxyfirewall-issues">Proxy/Firewall Issues</a></h4>
<pre><code class="language-bash"># Configure proxy if needed
export HTTP_PROXY=http://proxy.company.com:9090
export HTTPS_PROXY=http://proxy.company.com:9090
# Check firewall rules
sudo ufw status # Ubuntu
sudo firewall-cmd --list-all # RHEL/CentOS
</code></pre>
<h4 id="certificate-issues"><a class="header" href="#certificate-issues">Certificate Issues</a></h4>
<pre><code class="language-bash"># Update CA certificates
sudo apt update &amp;&amp; sudo apt install ca-certificates # Ubuntu
brew install ca-certificates # macOS
# Skip SSL verification (temporary)
export PROVISIONING_SKIP_SSL_VERIFY=true
</code></pre>
<h2 id="security-and-encryption-issues"><a class="header" href="#security-and-encryption-issues">Security and Encryption Issues</a></h2>
<h3 id="issue-sops-decryption-fails"><a class="header" href="#issue-sops-decryption-fails">Issue: SOPS Decryption Fails</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>SOPS decryption failed
Age key not found
Invalid key format
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check SOPS configuration
provisioning sops config
# Test SOPS manually
sops -d encrypted-file.k
# Check Age keys
ls -la ~/.config/sops/age/keys.txt
age-keygen -y ~/.config/sops/age/keys.txt
</code></pre>
<p><strong>Solutions:</strong></p>
<h4 id="missing-keys"><a class="header" href="#missing-keys">Missing Keys</a></h4>
<pre><code class="language-bash"># Generate new Age key
age-keygen -o ~/.config/sops/age/keys.txt
# Update SOPS configuration
provisioning sops config --key-file ~/.config/sops/age/keys.txt
</code></pre>
<h4 id="key-permissions"><a class="header" href="#key-permissions">Key Permissions</a></h4>
<pre><code class="language-bash"># Fix key file permissions
chmod 600 ~/.config/sops/age/keys.txt
chown $(whoami) ~/.config/sops/age/keys.txt
</code></pre>
<h4 id="configuration-issues-2"><a class="header" href="#configuration-issues-2">Configuration Issues</a></h4>
<pre><code class="language-bash"># Update SOPS configuration in ~/.config/provisioning/config.toml
[sops]
use_sops = true
key_search_paths = [
"~/.config/sops/age/keys.txt",
"/path/to/your/key.txt"
]
</code></pre>
<h3 id="issue-access-denied-errors"><a class="header" href="#issue-access-denied-errors">Issue: Access Denied Errors</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>Permission denied
Access denied
Insufficient privileges
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check user permissions
id
groups
# Check file permissions
ls -la ~/.config/provisioning/
ls -la /usr/local/provisioning/
# Test with sudo
sudo provisioning env
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Fix file ownership
sudo chown -R $(whoami):$(whoami) ~/.config/provisioning/
# Fix permissions
chmod -R 755 ~/.config/provisioning/
chmod 600 ~/.config/provisioning/config.toml
# Add user to required groups
sudo usermod -a -G docker $(whoami) # For Docker access
</code></pre>
<h2 id="data-and-storage-issues"><a class="header" href="#data-and-storage-issues">Data and Storage Issues</a></h2>
<h3 id="issue-disk-space-problems"><a class="header" href="#issue-disk-space-problems">Issue: Disk Space Problems</a></h3>
<p><strong>Symptoms:</strong></p>
<pre><code>No space left on device
Write failed
Disk full
</code></pre>
<p><strong>Diagnosis:</strong></p>
<pre><code class="language-bash"># Check disk usage
df -h
du -sh ~/.config/provisioning/
du -sh /usr/local/provisioning/
# Find large files
find /usr/local/provisioning -type f -size +100M
</code></pre>
<p><strong>Solutions:</strong></p>
<pre><code class="language-bash"># Clean up cache files
rm -rf ~/.config/provisioning/cache/*
rm -rf /usr/local/provisioning/.cache/*
# Clean up logs
find /usr/local/provisioning -name "*.log" -mtime +30 -delete
# Clean up temporary files
rm -rf /tmp/provisioning-*
# Compress old backups
gzip ~/.config/provisioning/backups/*.yaml
</code></pre>
<h2 id="recovery-procedures"><a class="header" href="#recovery-procedures">Recovery Procedures</a></h2>
<h3 id="configuration-recovery"><a class="header" href="#configuration-recovery">Configuration Recovery</a></h3>
<pre><code class="language-bash"># Restore from backup
provisioning config restore --backup latest
# Reset to defaults
provisioning config reset
# Recreate configuration
provisioning init config --force
</code></pre>
<h3 id="infrastructure-recovery"><a class="header" href="#infrastructure-recovery">Infrastructure Recovery</a></h3>
<pre><code class="language-bash"># Check infrastructure status
provisioning show servers --infra my-infra
# Recover failed servers
provisioning server create failed-server --infra my-infra
# Restore from backup
provisioning restore --backup latest --infra my-infra
</code></pre>
<h3 id="service-recovery"><a class="header" href="#service-recovery">Service Recovery</a></h3>
<pre><code class="language-bash"># Restart failed services
provisioning taskserv restart kubernetes --infra my-infra
# Reinstall corrupted services
provisioning taskserv delete kubernetes --infra my-infra
provisioning taskserv create kubernetes --infra my-infra
</code></pre>
<h2 id="prevention-strategies"><a class="header" href="#prevention-strategies">Prevention Strategies</a></h2>
<h3 id="regular-maintenance"><a class="header" href="#regular-maintenance">Regular Maintenance</a></h3>
<pre><code class="language-bash"># Weekly maintenance script
#!/bin/bash
# Update system
provisioning update --check
# Validate configuration
provisioning validate config
# Check for service updates
provisioning taskserv check-updates
# Clean up old files
provisioning cleanup --older-than 30d
# Create backup
provisioning backup create --name "weekly-$(date +%Y%m%d)"
</code></pre>
<h3 id="monitoring-setup"><a class="header" href="#monitoring-setup">Monitoring Setup</a></h3>
<pre><code class="language-bash"># Set up health monitoring
#!/bin/bash
# Check system health every hour
0 * * * * /usr/local/bin/provisioning health check || echo "Health check failed" | mail -s "Provisioning Alert" admin@company.com
# Weekly cost reports
0 9 * * 1 /usr/local/bin/provisioning show costs --all | mail -s "Weekly Cost Report" finance@company.com
</code></pre>
<h3 id="best-practices"><a class="header" href="#best-practices">Best Practices</a></h3>
<ol>
<li>
<p><strong>Configuration Management</strong></p>
<ul>
<li>Version control all configuration files</li>
<li>Use check mode before applying changes</li>
<li>Regular validation and testing</li>
</ul>
</li>
<li>
<p><strong>Security</strong></p>
<ul>
<li>Regular key rotation</li>
<li>Principle of least privilege</li>
<li>Audit logs review</li>
</ul>
</li>
<li>
<p><strong>Backup Strategy</strong></p>
<ul>
<li>Automated daily backups</li>
<li>Test restore procedures</li>
<li>Off-site backup storage</li>
</ul>
</li>
<li>
<p><strong>Documentation</strong></p>
<ul>
<li>Document custom configurations</li>
<li>Keep troubleshooting logs</li>
<li>Share knowledge with team</li>
</ul>
</li>
</ol>
<h2 id="getting-additional-help"><a class="header" href="#getting-additional-help">Getting Additional Help</a></h2>
<h3 id="debug-information-collection"><a class="header" href="#debug-information-collection">Debug Information Collection</a></h3>
<pre><code class="language-bash">#!/bin/bash
# Collect debug information
echo "Collecting provisioning debug information..."
mkdir -p /tmp/provisioning-debug
cd /tmp/provisioning-debug
# System information
uname -a &gt; system-info.txt
free -h &gt;&gt; system-info.txt
df -h &gt;&gt; system-info.txt
# Provisioning information
provisioning --version &gt; provisioning-info.txt
provisioning env &gt;&gt; provisioning-info.txt
provisioning validate config --detailed &gt; config-validation.txt 2&gt;&amp;1
# Configuration files
cp ~/.config/provisioning/config.toml user-config.toml 2&gt;/dev/null || echo "No user config" &gt; user-config.toml
# Logs
provisioning show logs &gt; system-logs.txt 2&gt;&amp;1
# Create archive
cd /tmp
tar czf provisioning-debug-$(date +%Y%m%d_%H%M%S).tar.gz provisioning-debug/
echo "Debug information collected in: provisioning-debug-*.tar.gz"
</code></pre>
<h3 id="support-channels"><a class="header" href="#support-channels">Support Channels</a></h3>
<ol>
<li>
<p><strong>Built-in Help</strong></p>
<pre><code class="language-bash">provisioning help
provisioning help &lt;command&gt;
</code></pre>
</li>
<li>
<p><strong>Documentation</strong></p>
<ul>
<li>User guides in <code>docs/user/</code></li>
<li>CLI reference: <code>docs/user/cli-reference.md</code></li>
<li>Configuration guide: <code>docs/user/configuration.md</code></li>
</ul>
</li>
<li>
<p><strong>Community Resources</strong></p>
<ul>
<li>Project repository issues</li>
<li>Community forums</li>
<li>Documentation wiki</li>
</ul>
</li>
<li>
<p><strong>Enterprise Support</strong></p>
<ul>
<li>Professional services</li>
<li>Priority support</li>
<li>Custom development</li>
</ul>
</li>
</ol>
<p>Remember: When reporting issues, always include the debug information collected above and specific error messages.</p>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../user/test-environment-usage.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../user/AUTHENTICATION_LAYER_GUIDE.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../user/test-environment-usage.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../user/AUTHENTICATION_LAYER_GUIDE.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<!-- Livereload script (if served using the cli tool) -->
<script>
const wsProtocol = location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsAddress = wsProtocol + "//" + location.host + "/" + "__livereload";
const socket = new WebSocket(wsAddress);
socket.onmessage = function (event) {
if (event.data === "reload") {
socket.close();
location.reload();
}
};
window.onbeforeunload = function() {
socket.close();
}
</script>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>