Skip to content

Commit 24d97fc

Browse files
feat: Replace hardcoded SPDX enum with spdx-licenses gem for dynamic validation (#44)
The license/spdx annotation previously used a hardcoded enum of ~20 licenses, requiring manual updates whenever a new SPDX ID appeared. Now uses the spdx-licenses gem which provides the full canonical list of 500+ identifiers. - Add spdx-licenses gem dependency - Remove hardcoded enum from license/spdx annotation - Replace KNOWN_SPDX set with SpdxLicenses.exist? calls in LicenseAnalyzer - Add validator: option to Annotation class for custom validation lambdas - Add SPDX_VALIDATOR on TechnologyArtifact to validate license values at lint time - Add BUSL-1.1 as source-available category for risk assessment Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2535374 commit 24d97fc

9 files changed

Lines changed: 147 additions & 13 deletions

File tree

.rubocop.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ Metrics/BlockLength:
8181
- "*.gemspec"
8282

8383
Metrics/ParameterLists:
84-
Max: 10
84+
Max: 12
8585
Exclude:
8686
- "test/**/*"
8787

Gemfile.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ PATH
1212
rouge (~> 4.7.0)
1313
sinatra (~> 4.0)
1414
sinatra-contrib (~> 4.0)
15+
spdx-licenses (~> 1.0)
1516
thor (~> 1.0)
1617
tty-markdown (~> 0.7)
1718

@@ -206,6 +207,7 @@ GEM
206207
rack-protection (= 4.2.1)
207208
sinatra (= 4.2.1)
208209
tilt (~> 2.0)
210+
spdx-licenses (1.4.0)
209211
steep (1.10.0)
210212
activesupport (>= 5.1)
211213
concurrent-ruby (>= 1.1.10)
@@ -356,6 +358,7 @@ CHECKSUMS
356358
simplecov_json_formatter (0.1.4) sha256=529418fbe8de1713ac2b2d612aa3daa56d316975d307244399fa4838c601b428
357359
sinatra (4.2.1) sha256=b7aeb9b11d046b552972ade834f1f9be98b185fa8444480688e3627625377080
358360
sinatra-contrib (4.2.1) sha256=10d091c944d268aa910c618ea40a3c3ebe0533e6e32990d84af92235a3d26b4a
361+
spdx-licenses (1.4.0) sha256=953820f3714b5f998b56a2a663ebeac51667a4017392ad53cd30709e8fa4f67d
359362
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
360363
stringio (3.2.0) sha256=c37cb2e58b4ffbd33fe5cd948c05934af997b36e0b6ca6fdf43afa234cf222e1
361364
strings (0.2.1) sha256=933293b3c95cf85b81eb44b3cf673e3087661ba739bbadfeadf442083158d6fb

archsight.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Gem::Specification.new do |spec|
5151
spec.add_dependency "rouge", "~> 4.7.0"
5252
spec.add_dependency "sinatra", "~> 4.0"
5353
spec.add_dependency "sinatra-contrib", "~> 4.0"
54+
spec.add_dependency "spdx-licenses", "~> 1.0"
5455
spec.add_dependency "thor", "~> 1.0"
5556
spec.add_dependency "tty-markdown", "~> 0.7"
5657
end

lib/archsight/annotations/annotation.rb

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def initialize(key, options = {})
1212
@explicit_title = options[:title]
1313
@filter = options[:filter]
1414
@enum = options[:enum]
15+
@validator = options[:validator]
1516
@sidebar = options.fetch(:sidebar, true)
1617
@list = options.fetch(:list, false)
1718
@editor = options.fetch(:editor, true)
@@ -54,7 +55,7 @@ def list_display?
5455
end
5556

5657
def has_validation?
57-
@enum || @type.is_a?(Class)
58+
@enum || @validator || @type.is_a?(Class)
5859
end
5960

6061
# === Value Methods (for instance values) ===
@@ -85,6 +86,7 @@ def validate(value)
8586
return errors if value.nil?
8687

8788
validate_enum(value, errors)
89+
validate_custom(value, errors) if errors.empty?
8890
validate_type(value, errors) if errors.empty?
8991
validate_code(value, errors) if errors.empty?
9092

@@ -147,6 +149,16 @@ def validate_enum(value, errors)
147149
end
148150
end
149151

152+
def validate_custom(value, errors)
153+
return unless @validator
154+
155+
values = list? ? value.to_s.split(",").map(&:strip) : [value.to_s]
156+
values.each do |v|
157+
message = @validator.call(v) # steep:ignore
158+
errors << message if message
159+
end
160+
end
161+
150162
def validate_type(value, errors)
151163
return unless @type.is_a?(Class)
152164

lib/archsight/import/license_analyzer.rb

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
require "open3"
44
require "json"
5+
require "spdx-licenses"
56
require "archsight/import"
67

78
# License detection and dependency license scanning for repositories
@@ -32,6 +33,7 @@ class Archsight::Import::LicenseAnalyzer
3233
{ id: "Unlicense", re: /\bThis is free and unencumbered software\b/mi },
3334
{ id: "CC0-1.0", re: /Creative Commons.*CC0|CC0 1\.0 Universal/mi },
3435
{ id: "BSL-1.0", re: /Boost Software License/mi },
36+
{ id: "BUSL-1.1", re: /Business Source License.*1\.1/mi },
3537
{ id: "EUPL-1.2", re: /European Union Public Licen[cs]e.*1\.2/mi }
3638
].freeze
3739

@@ -40,6 +42,7 @@ class Archsight::Import::LicenseAnalyzer
4042
"permissive" => %w[Apache-2.0 MIT BSD-3-Clause BSD-2-Clause ISC Unlicense CC0-1.0 BSL-1.0 0BSD Ruby],
4143
"copyleft" => %w[GPL-3.0 GPL-2.0 AGPL-3.0],
4244
"weak-copyleft" => %w[LGPL-3.0 LGPL-2.1 MPL-2.0 EUPL-1.2 CDDL-1.0],
45+
"source-available" => %w[BUSL-1.1],
4346
"proprietary" => %w[proprietary]
4447
}.freeze
4548

@@ -56,10 +59,8 @@ class Archsight::Import::LicenseAnalyzer
5659
\(c\)\s
5760
/xi
5861

59-
# Known SPDX IDs for dual-license splitting
60-
KNOWN_SPDX = Set.new(
61-
CATEGORIES.values.flatten + %w[NOASSERTION unknown]
62-
).freeze
62+
# Custom non-SPDX values we accept
63+
CUSTOM_LICENSE_VALUES = Set.new(%w[NOASSERTION proprietary unknown]).freeze
6364

6465
# License file names to search (in order of priority)
6566
LICENSE_FILES = %w[
@@ -289,7 +290,7 @@ def normalize_spdx(raw)
289290
parts = cleaned.split(%r{\s*/\s*|\s+OR\s+}i)
290291
parts.each do |part|
291292
normalized = normalize_spdx_single(part.strip)
292-
return normalized if KNOWN_SPDX.include?(normalized)
293+
return normalized if known_spdx?(normalized)
293294
end
294295
end
295296

@@ -322,6 +323,11 @@ def normalize_spdx_single(cleaned)
322323
end
323324
end
324325

326+
# Check if a value is a known SPDX ID or one of our custom values
327+
def known_spdx?(value)
328+
CUSTOM_LICENSE_VALUES.include?(value) || SpdxLicenses.exist?(value)
329+
end
330+
325331
# Categorize a license SPDX identifier
326332
def categorize(spdx)
327333
CATEGORY_LOOKUP[spdx] || "unknown"
@@ -633,13 +639,15 @@ def assess_risk(license_names, total)
633639

634640
strong_copyleft = CATEGORIES["copyleft"]
635641
weak_copyleft = CATEGORIES["weak-copyleft"]
642+
source_available = CATEGORIES["source-available"]
636643

637644
has_strong = license_names.any? { |l| strong_copyleft.include?(l) }
638645
has_weak = license_names.any? { |l| weak_copyleft.include?(l) }
646+
has_source_available = license_names.any? { |l| source_available.include?(l) }
639647
unknown_count = license_names.count { |l| l == "unknown" }
640648
many_unknown = unknown_count.positive? && (unknown_count.to_f / license_names.size) > 0.5
641649

642-
if has_strong || many_unknown
650+
if has_strong || many_unknown || has_source_available
643651
"copyleft"
644652
elsif has_weak
645653
"weak-copyleft"

lib/archsight/resources/base.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ def self.relations
2525

2626
# Define an annotation using the Annotation class
2727
def self.annotation(key, description: nil, filter: nil, title: nil, format: nil, enum: nil, sidebar: true,
28-
type: nil, list: false, editor: true)
28+
type: nil, list: false, editor: true, validator: nil)
2929
@annotations ||= [] #: Array[Archsight::Annotations::Annotation]
3030
options = { description: description, filter: filter, title: title, format: format, enum: enum,
31-
sidebar: sidebar, type: type, list: list, editor: editor }
31+
sidebar: sidebar, type: type, list: list, editor: editor, validator: validator }
3232
@annotations << Archsight::Annotations::Annotation.new(key, options)
3333
end
3434

lib/archsight/resources/technology_artifact.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# frozen_string_literal: true
22

33
require "uri"
4+
require "spdx-licenses"
45

56
# TechnologyArtifact usually a source code repository or container
67
class Archsight::Resources::TechnologyArtifact < Archsight::Resources::Base
@@ -159,14 +160,17 @@ class Archsight::Resources::TechnologyArtifact < Archsight::Resources::Base
159160
enum: %w[unprivileged privileged]
160161

161162
# License information
163+
SPDX_CUSTOM_VALUES = Set.new(%w[NOASSERTION proprietary unknown]).freeze
164+
SPDX_VALIDATOR = lambda { |v|
165+
"invalid SPDX license identifier '#{v}'" unless SPDX_CUSTOM_VALUES.include?(v) || SpdxLicenses.exist?(v)
166+
}
167+
162168
annotation "license/spdx",
163169
description: "SPDX license identifier",
164170
title: "License",
165171
filter: :word,
166172
sidebar: false,
167-
enum: %w[Apache-2.0 MIT BSD-3-Clause BSD-2-Clause GPL-3.0 GPL-2.0 LGPL-3.0
168-
LGPL-2.1 MPL-2.0 ISC AGPL-3.0 Unlicense CC0-1.0 BSL-1.0 EUPL-1.2
169-
0BSD CDDL-1.0 Ruby NOASSERTION proprietary unknown]
173+
validator: SPDX_VALIDATOR
170174
annotation "license/file",
171175
description: "License file path",
172176
title: "License File",

sig/archsight/annotations.rbs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ module Archsight
1111
attr_reader list: bool
1212

1313
@explicit_title: String?
14+
@validator: (^(String) -> String?)?
1415
@regex: untyped
1516

1617
def initialize: (String, ?Hash[Symbol, untyped]) -> void
@@ -34,6 +35,7 @@ module Archsight
3435

3536
def type_error_message: () -> String
3637
def validate_enum: (untyped, Array[String]) -> void
38+
def validate_custom: (untyped, Array[String]) -> void
3739
def validate_type: (untyped, Array[String]) -> void
3840
def valid_type_value?: (String) -> bool
3941
def valid_uri?: (String) -> bool

test/import/license_analyzer_test.rb

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,112 @@ def test_normalize_parenthesized_or_expression
646646
assert_equal "MIT", result["license_spdx"]
647647
end
648648

649+
# --- SPDX gem validation (known_spdx?) ---
650+
651+
def test_dual_license_recognizes_spdx_id_not_in_categories
652+
# Artistic-2.0 is a valid SPDX ID but not in our CATEGORIES hash;
653+
# the gem should still recognize it during dual-license splitting
654+
write_license_manifest("Artistic-2.0/MIT")
655+
result = analyze
656+
657+
assert_equal "Artistic-2.0", result["license_spdx"]
658+
end
659+
660+
def test_dual_license_recognizes_mpl_2_0_plus_uncommon_spdx
661+
# Zlib is valid SPDX but was never in the old hardcoded list
662+
write_license_manifest("Zlib OR MIT")
663+
result = analyze
664+
665+
assert_equal "Zlib", result["license_spdx"]
666+
end
667+
668+
def test_dual_license_recognizes_custom_value_noassertion
669+
write_license_manifest("NOASSERTION/MIT")
670+
result = analyze
671+
672+
assert_equal "NOASSERTION", result["license_spdx"]
673+
end
674+
675+
def test_dual_license_skips_unrecognized_picks_known
676+
# "FooBar-1.0" is not a real SPDX ID; should skip it and pick MIT
677+
write_license_manifest("FooBar-1.0/MIT")
678+
result = analyze
679+
680+
assert_equal "MIT", result["license_spdx"]
681+
end
682+
683+
# --- BUSL-1.1 (source-available) ---
684+
685+
def test_detects_busl_license_file
686+
write_license("Business Source License 1.1\nLicensor: Acme Corp")
687+
result = analyze
688+
689+
assert_equal "BUSL-1.1", result["license_spdx"]
690+
end
691+
692+
def test_busl_categorized_as_source_available
693+
assert_equal "source-available",
694+
Archsight::Import::LicenseAnalyzer::CATEGORY_LOOKUP["BUSL-1.1"]
695+
end
696+
697+
# --- SpdxLicenses gem integration ---
698+
699+
def test_spdx_gem_recognizes_common_licenses
700+
%w[MIT Apache-2.0 GPL-3.0-only BSD-3-Clause ISC].each do |id|
701+
assert SpdxLicenses.exist?(id), "Expected SpdxLicenses to recognize #{id}"
702+
end
703+
end
704+
705+
def test_spdx_gem_rejects_invalid_ids
706+
%w[NOASSERTION proprietary unknown FooBar-1.0].each do |id|
707+
refute SpdxLicenses.exist?(id), "Expected SpdxLicenses to NOT recognize #{id}"
708+
end
709+
end
710+
711+
def test_custom_license_values_includes_our_special_ids
712+
custom = Archsight::Import::LicenseAnalyzer::CUSTOM_LICENSE_VALUES
713+
714+
assert_includes custom, "NOASSERTION"
715+
assert_includes custom, "proprietary"
716+
assert_includes custom, "unknown"
717+
end
718+
719+
# --- Lint validation (TechnologyArtifact license/spdx annotation) ---
720+
721+
def test_spdx_annotation_validates_valid_license
722+
annotation = spdx_annotation
723+
724+
assert_empty annotation.validate("MIT")
725+
assert_empty annotation.validate("Apache-2.0")
726+
assert_empty annotation.validate("GPL-3.0-only")
727+
end
728+
729+
def test_spdx_annotation_validates_custom_values
730+
annotation = spdx_annotation
731+
732+
assert_empty annotation.validate("NOASSERTION")
733+
assert_empty annotation.validate("proprietary")
734+
assert_empty annotation.validate("unknown")
735+
end
736+
737+
def test_spdx_annotation_rejects_invalid_license
738+
annotation = spdx_annotation
739+
errors = annotation.validate("NotARealLicense-1.0")
740+
741+
refute_empty errors
742+
assert_match(/invalid SPDX/, errors.first)
743+
end
744+
745+
def test_spdx_annotation_has_validation
746+
assert_predicate spdx_annotation, :has_validation?, "license/spdx annotation should have validation"
747+
end
748+
649749
private
650750

751+
def spdx_annotation
752+
Archsight::Resources::TechnologyArtifact.annotations.find { |a| a.key == "license/spdx" }
753+
end
754+
651755
# Write a license string via package.json manifest (simulates dependency-tool output)
652756
def write_license_manifest(license_string)
653757
File.write(File.join(@repo_dir, "package.json"),

0 commit comments

Comments
 (0)