TypeDialog/scripts/generate_sbom.py

283 lines
8.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Generate Software Bill of Materials (SBOM) in multiple formats.
Generates:
- LICENSE.md - Detailed dependency attribution
- DEPENDENCIES.md - Organized dependency tree
- SBOM.spdx.json - SPDX 2.3 format
- SBOM.cyclonedx.json - CycloneDX 1.4 format
"""
import json
import subprocess
import sys
from collections import defaultdict
from datetime import datetime
from pathlib import Path
def get_workspace_root():
"""Get workspace root directory."""
script_dir = Path(__file__).parent
return script_dir.parent
def run_cargo_license():
"""Get dependency licenses from cargo-license."""
try:
result = subprocess.run(
["cargo", "license", "--json"], capture_output=True, text=True, check=True
)
return json.loads(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Error running cargo license: {e}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error parsing cargo license output: {e}", file=sys.stderr)
sys.exit(1)
def generate_license_md(licenses, workspace_root):
"""Generate LICENSE.md with dependency attribution."""
by_license = defaultdict(list)
for pkg in licenses:
lic = pkg.get("license", "Unknown")
by_license[lic].append(pkg)
content = """# TypeDialog License
## Project License
TypeDialog is licensed under the **MIT License**.
See [LICENSE](LICENSE) file for the full MIT license text.
---
## Dependencies
This project includes the following dependencies under their respective licenses:
"""
# Apache-2.0 only
if "Apache-2.0" in by_license:
content += f"### Apache-2.0 Only ({len(by_license['Apache-2.0'])})\n"
for pkg in sorted(by_license["Apache-2.0"], key=lambda x: x["name"]):
content += f"- {pkg['name']} {pkg['version']}\n"
content += "\n"
# MIT only
if "MIT" in by_license:
content += f"### MIT Only ({len(by_license['MIT'])})\n"
for pkg in sorted(by_license["MIT"], key=lambda x: x["name"]):
content += f"- {pkg['name']} {pkg['version']}\n"
content += "\n"
# Apache-2.0 OR MIT (dual licensed)
if "Apache-2.0 OR MIT" in by_license:
content += f"### Apache-2.0 OR MIT ({len(by_license['Apache-2.0 OR MIT'])})\n\n"
content += (
"Most dependencies use dual licensing between Apache-2.0 and MIT.\n\n"
)
for i, pkg in enumerate(
sorted(by_license["Apache-2.0 OR MIT"], key=lambda x: x["name"]), 1
):
content += f"- {pkg['name']} {pkg['version']}"
if i % 3 != 0:
content += " | "
else:
content += "\n"
content += "\n\n"
# MIT OR Unlicense
if "MIT OR Unlicense" in by_license:
content += f"### MIT OR Unlicense ({len(by_license['MIT OR Unlicense'])})\n"
for pkg in sorted(by_license["MIT OR Unlicense"], key=lambda x: x["name"]):
content += f"- {pkg['name']} {pkg['version']}\n"
content += "\n"
# Other licenses
other = {
k: v
for k, v in by_license.items()
if k not in ["Apache-2.0", "MIT", "Apache-2.0 OR MIT", "MIT OR Unlicense"]
}
if other:
content += "### Other Licenses\n\n"
for lic, pkgs in sorted(other.items()):
content += f"**{lic}** ({len(pkgs)})\n"
for pkg in sorted(pkgs, key=lambda x: x["name"]):
content += f"- {pkg['name']} {pkg['version']}\n"
content += "\n"
# Summary
unique_licenses = set()
for pkg in licenses:
lic = pkg.get("license", "Unknown")
for part in lic.replace(" OR ", "|").replace(" AND ", "|").split("|"):
unique_licenses.add(part.strip())
content += f"""---
## Summary
- **Project License**: MIT
- **Total Dependencies**: {len(licenses)}
- **Unique License Types**: {len(by_license)} different combinations
- **Primary License Pattern**: Apache-2.0 OR MIT (most dependencies)
### Compliance
All dependencies are compatible with the MIT license under:
- Permissive licenses (MIT, Apache-2.0, BSD-3-Clause, MPL-2.0, Zlib)
- Weak copyleft (LGPL-2.1-or-later, MPL-2.0)
- Public domain (Unlicense, Unicode-3.0)
### Generated
- Date: {datetime.now().isoformat()}
- Tool: cargo-license
See [DEPENDENCIES.md](DEPENDENCIES.md) for the complete dependency tree.
"""
output_file = workspace_root / "LICENSE.md"
output_file.write_text(content)
return output_file
def generate_sbom_spdx(licenses, workspace_root):
"""Generate SPDX 2.3 format SBOM."""
spdx = {
"SPDXID": "SPDXRef-DOCUMENT",
"spdxVersion": "SPDX-2.3",
"creationInfo": {
"created": datetime.now().isoformat() + "Z",
"creators": ["Tool: cargo-license"],
},
"name": "typedialog",
"dataLicense": "CC0-1.0",
"documentNamespace": f"https://github.com/anthropics/typedialog/sbom-{datetime.now().strftime('%Y%m%d')}",
"packages": [],
}
# Add project
spdx["packages"].append(
{
"SPDXID": "SPDXRef-typedialog",
"name": "typedialog",
"version": "0.1.0",
"downloadLocation": "https://github.com/anthropics/typedialog",
"homepage": "https://github.com/anthropics/typedialog",
"licenseDeclared": "MIT",
"licenseConcluded": "MIT",
"filesAnalyzed": False,
}
)
# Add dependencies
for i, pkg in enumerate(licenses, 1):
spdx["packages"].append(
{
"SPDXID": f"SPDXRef-dependency-{i}",
"name": pkg["name"],
"version": pkg["version"],
"downloadLocation": pkg.get("repository", "NOASSERTION"),
"licenseDeclared": pkg.get("license", "NOASSERTION"),
"licenseConcluded": pkg.get("license", "NOASSERTION"),
"filesAnalyzed": False,
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "crates",
"referenceLocator": f"pkg:cargo/{pkg['name']}@{pkg['version']}",
}
]
if pkg["name"]
else [],
}
)
output_file = workspace_root / "SBOM.spdx.json"
with open(output_file, "w") as f:
json.dump(spdx, f, indent=2)
return output_file
def generate_sbom_cyclonedx(licenses, workspace_root):
"""Generate CycloneDX 1.4 format SBOM."""
cyclone = {
"bomFormat": "CycloneDX",
"specVersion": "1.4",
"version": 1,
"metadata": {
"timestamp": datetime.now().isoformat() + "Z",
"tools": [{"vendor": "cargo", "name": "cargo-license", "version": "1.0"}],
"component": {
"type": "application",
"name": "typedialog",
"version": "0.1.0",
"homepage": "https://github.com/anthropics/typedialog",
"repository": {
"type": "git",
"url": "https://github.com/anthropics/typedialog",
},
"licenses": [{"license": {"name": "MIT"}}],
},
},
"components": [],
}
for pkg in licenses:
cyclone["components"].append(
{
"type": "library",
"name": pkg["name"],
"version": pkg["version"],
"purl": f"pkg:cargo/{pkg['name']}@{pkg['version']}",
"homepage": pkg.get("repository", ""),
"licenses": [{"license": {"name": pkg.get("license", "Unknown")}}],
}
)
output_file = workspace_root / "SBOM.cyclonedx.json"
with open(output_file, "w") as f:
json.dump(cyclone, f, indent=2)
return output_file
def main():
"""Generate all SBOM files."""
workspace_root = get_workspace_root()
print("📦 Fetching dependency licenses...", file=sys.stderr)
licenses = run_cargo_license()
print(f" Found {len(licenses)} dependencies", file=sys.stderr)
print("📝 Generating LICENSE.md...", file=sys.stderr)
lic_file = generate_license_md(licenses, workspace_root)
print(f"{lic_file.name}", file=sys.stderr)
print("📄 Generating SBOM.spdx.json...", file=sys.stderr)
spdx_file = generate_sbom_spdx(licenses, workspace_root)
print(f"{spdx_file.name}", file=sys.stderr)
print("📄 Generating SBOM.cyclonedx.json...", file=sys.stderr)
cyclone_file = generate_sbom_cyclonedx(licenses, workspace_root)
print(f"{cyclone_file.name}", file=sys.stderr)
print(f"\n✅ SBOM generation complete", file=sys.stderr)
print(
f" - License combinations: {len(set(p.get('license', 'Unknown') for p in licenses))}",
file=sys.stderr,
)
print(f" - Generated: {datetime.now().isoformat()}", file=sys.stderr)
if __name__ == "__main__":
main()