File generate_sbom of Package build
#!/usr/bin/perl
################################################################
#
# Copyright (c) 2023 SUSE Linux LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 or 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see the file COPYING); if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
################################################################
BEGIN {
unshift @INC, ($::ENV{'BUILD_DIR'} || '/usr/lib/build');
}
use strict;
use File::Find;
use File::Temp;
use Digest::SHA;
use Digest::MD5;
use Build::Rpm;
use Build::SimpleJSON;
my $spdx_json_template = {
'_order' => [ qw{spdxVersion dataLicense SPDXID name documentNamespace creationInfo packages files relationships} ],
'creationInfo' => {
'_order' => [ qw{created creators licenseListVersion} ],
},
'packages' => {
'_order' => [ qw{name SPDXID versionInfo originator downloadLocation sourceInfo licenseConcluded licenseDeclared copyrightText externalRefs} ],
'externalRefs' => {
'_order' => [ qw{referenceCategory referenceType referenceLocator} ],
},
},
'files' => {
'_order' => [ qw{fileName SPDXID checksums licenseConcluded copyrightText comment} ],
},
'relationships' => {
'_order' => [ qw{spdxElementId relatedSpdxElement relationshipType} ],
},
};
my $intoto_json_template = {
'_order' => [ qw{_type predicateType subject predicate} ],
'subject' => {
'_order' => [ qw{name digest} ],
},
'predicate' => $spdx_json_template,
};
sub urlencode {
my ($str, $iscgi) = @_;
if ($iscgi) {
$str =~ s/([\000-\037<>;\"#\?&\+=%[\177-\377])/sprintf("%%%02X",ord($1))/sge;
$str =~ tr/ /+/;
} else {
$str =~ s/([\000-\040<>;\"#\?&\+=%[\177-\377])/sprintf("%%%02X",ord($1))/sge;
}
return $str;
}
sub rfc3339time {
my ($t) = @_;
my @gt = gmtime($t || time());
return sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ", $gt[5] + 1900, $gt[4] + 1, @gt[3,2,1,0];
}
sub sha256file {
my ($fn) = @_;
my $ctx = Digest::SHA->new(256);
eval { $ctx->addfile($fn) };
die("$fn: $@\n") if $@;
return $ctx->hexdigest();
}
sub system_chroot {
my ($root, @args) = @_;
my $pid = 0;
if ($args[0] eq 'exec') {
shift @args;
} else {
$pid = fork();
die("fork: $!\n") unless defined $pid;
}
if (!$pid) {
if ($args[0] eq 'quiet') {
shift @args;
open(STDOUT, '>>', '/dev/null');
open(STDERR, '>>', '/dev/null');
}
if ($args[0] eq 'stdout') {
open(STDOUT, '>', $args[1]) || die("$args[1]: $!\n");
splice(@args, 0, 2);
}
!$root || chroot($root) || die("chroot $root: $!\n");
exec(@args);
die("exec $args[0]: $!\n");
}
die unless waitpid($pid, 0) == $pid;
return $?;
}
sub popen_chroot {
my ($root, @args) = @_;
my $fd;
if (!$root) {
open($fd, '-|', @args) || die("open: $!\n");
return $fd;
}
my $pid = open($fd, '-|');
die("open: $!\n") unless defined $pid;
if ($pid == 0) {
!$root || chroot($root) || die("chroot $root: $!\n");
exec(@args);
die("exec $args[0]: $!\n");
}
return $fd;
}
sub can_run {
my ($root, $fname) = @_;
return 0 if $root && $>;
return -x "$root$fname";
}
sub systemq {
my $pid = fork();
die("fork: $!\n") unless defined $pid;
if (!$pid) {
open(STDOUT, '>', '/dev/null') || die("/dev/null: $!\n");
exec @_;
die("$_[0]: $!\n");
}
waitpid($pid, 0) == $pid || die("waitpid: $!\n");
exit($?) if $?;
}
sub uncompress_container {
my ($container, $outfile) = @_;
my @decompressor;
if ($container =~ /\.tar$/) {
push @decompressor, 'cat';
} elsif ($container =~ /\.tar\.gz$/) {
push @decompressor, 'gunzip';
} elsif ($container =~ /\.tar\.xz$/) {
push @decompressor, 'xzdec';
} else {
die("$container: unknown format\n");
}
my $pid = fork();
die("fork: $!\n") unless defined $pid;
if (!$pid) {
open(STDIN, '<', $container) || die("$container: $!\n");
open(STDOUT, '>', $outfile) || die("$outfile: $!\n");
exec @decompressor;
die("$decompressor[0]: $!\n");
}
waitpid($pid, 0) == $pid || die("waitpid: $!\n");
exit($?) if $?;
}
sub unpack_container {
my ($dir, $container) = @_;
uncompress_container($container, "$dir/cont");
systemq('skopeo', 'copy', "docker-archive:$dir/cont", "oci:$dir/image:latest");
unlink("$dir/cont");
systemq('umoci', 'unpack', '--image', "$dir/image:latest", "$dir/unpack");
return "$dir/unpack/rootfs";
}
sub dump_rpmdb {
my ($root, $outfile) = @_;
my $dbpath;
for my $phase (0, 1) {
if (can_run($root, '/usr/bin/rpmdb')) {
# check if we have the exportdb option
if (system_chroot($root, 'quiet', '/usr/bin/rpmdb', '--exportdb', '--version') == 0) {
if ($dbpath) {
system_chroot($root, 'stdout', $outfile, '/usr/bin/rpmdb', '--dbpath', $dbpath, '--exportdb');
} else {
system_chroot($root, 'stdout', $outfile, '/usr/bin/rpmdb', '--exportdb');
}
}
exit($?) if $?;
return;
}
# try to get the dbpath from the root if we can
if (!$dbpath && can_run($root, '/usr/bin/rpm')) {
my $fd = popen_chroot($root, '/usr/bin/rpm', '--eval', '%_dbpath');
my $path = <$fd>;
close($fd);
chomp $path;
$dbpath = $path if $path && $path =~ /^\//;
}
$dbpath ||= '/var/lib/rpm'; # guess
# try to dump with rpmdb_dump
if (-s "$root$dbpath/Packages" && can_run($root, '/usr/lib/rpm/rpmdb_dump')) {
my $outfh;
open($outfh, '>', $outfile) || die("$outfile: $!\n");
my $fd = popen_chroot($root, '/usr/lib/rpm/rpmdb_dump', "$dbpath/Packages");
while (<$fd>) {
next unless /^\s*[0-9a-fA-F]{8}/;
chomp;
my $v = <$fd>;
die("unexpected EOF\n") unless $v;
chomp $v;
substr($v, 0, 1, '') while substr($v, 0, 1) eq ' ';
$v = pack('H*', $v);
next if length($v) < 16;
my ($il, $dl) = unpack('NN', $v);
die("bad header length\n") unless length($v) == 8 + $il * 16 + $dl;
die("print: $!\n") unless print $outfh pack('H*', '8eade80100000000');
die("print: $!\n") unless print $outfh $v;
}
close($fd) || die("rpmdb_dump: $!\n");
close($outfh) || die("close: $!\n");
return;
}
last unless $root;
# try with the system rpm and a dbpath
$dbpath = "$root$dbpath";
$root = '';
}
die("could not dump rpm database\n");
}
sub gen_filelist {
my ($dir) = @_;
my $fd;
my $pid = open($fd, '-|');
die("fork: $!\n") unless defined $pid;
if (!$pid) {
chdir($dir) || die("chdir $!\n");
exec('find', '-print0');
die("find: $!\n");
}
local $/ = "\0";
my @files = <$fd>;
chomp @files;
close($fd) || die("find: $?\n");
$_ =~ s/^\.\//\// for @files;
$_ = {'name' => $_} for @files;
for my $f (@files) {
if (-l "$dir$f->{'name'}" || ! -f _) {
$f->{'SKIP'} = 1;
next;
}
$f->{'sha256sum'} = sha256file("$dir/$f->{'name'}");
}
return \@files;
}
sub read_rpm {
my ($rpm) = @_;
my %r = Build::Rpm::rpmq($rpm, qw{NAME VERSION RELEASE EPOCH ARCH LICENSE SOURCERPM DISTURL FILENAMES VENDOR FILEMODES FILEDIGESTS FILEDIGESTALGO SIGMD5});
delete $r{$_} for qw{BASENAMES DIRNAMES DIRINDEXES}; # save mem
for (qw{NAME VERSION RELEASE EPOCH ARCH LICENSE SOURCERPM DISTURL VENDOR FILEDIGESTALGO SIGMD5}) {
next unless $r{$_};
die("bad rpm entry for $_\n") unless ref($r{$_}) eq 'ARRAY' && @{$r{$_}} == 1;
$r{$_} = $r{$_}->[0];
}
return \%r;
}
sub read_pkgs_rpmdb {
my ($rpmhdrs) = @_;
my $fd;
open($fd, '<', $rpmhdrs) || die("$rpmhdrs: $!\n");
my @rpms;
while (1) {
my $hdr = '';
last unless read($fd, $hdr, 16) == 16;
my ($il, $dl) = unpack('@8NN', $hdr);
die("bad rpm header\n") unless $il && $dl;
die("bad rpm header\n") unless read($fd, $hdr, $il * 16 + $dl, 16) == $il * 16 + $dl;
push @rpms, read_rpm([ $hdr ]);
}
close($fd);
@rpms = sort {$a->{'NAME'} cmp $b->{'NAME'} || $a->{'VERSION'} cmp $b->{'VERSION'} || $a->{'RELEASE'} cmp $b->{'RELEASE'}} @rpms;
return \@rpms;
}
sub read_pkgs_from_product_directory {
my ($dir) = @_;
my @rpms;
my $addrpmfile = sub {
my $fn = $File::Find::name;
push @rpms, read_rpm($fn) if $fn =~ /\.rpm$/;
};
find($addrpmfile, $dir);
return \@rpms;
}
sub read_pkgs_from_rpmmd {
my ($primaryfile) = @_;
require Build::Rpmmd;
my $fh;
if ($primaryfile =~ /\.gz$/) {
open($fh, '-|', 'gunzip', '-dc', $primaryfile) || die("$primaryfile: $!\n");
} else {
open($fh, '<', $primaryfile) || die("$primaryfile: $!\n");
}
my @rpms;
for my $pkg (@{Build::Rpmmd::parse($fh, undef, 'withlicense' => 1, 'withchecksum' => 1, 'withvendor' => 1)}) {
my $r = {};
for (qw{name epoch version release arch vendor sourcerpm license checksum}) {
$r->{uc($_)} = $pkg->{$_} if defined $pkg->{$_};
}
push @rpms, $r;
}
close($fh);
return \@rpms;
}
sub read_dist {
my ($dir) = @_;
my %dist;
my $fd;
if (open($fd, '<', "$dir/etc/os-release") || open($fd, '<', "$dir/usr/lib/os-release")) {
while(<$fd>) {
chomp;
next unless /\s*(\S+)=(.*)/;
my $k = lc($1);
my $v = $2;
$v =~ s/\s+$//;
$v =~ s/^\"(.*)\"$/$1/;
if ($k eq 'id_like') {
push @{$dist{$k}}, $v;
} else {
$dist{$k} = $v;
}
}
close($fd);
}
return %dist ? \%dist : undef;
}
sub gen_purl_rpm {
my ($p, $distro) = @_;
my $vr = $p->{'VERSION'};
$vr = "$vr-$p->{'RELEASE'}" if defined $p->{'RELEASE'};
my $vendor = lc($p->{'VENDOR'});
$vendor =~ s/obs:\/\///; # third party OBS builds
$vendor =~ s/\ .*//; # eg. SUSE LLC...
$vendor =~ s/\/?$/\//;
my $purlurl = "pkg:".urlencode("rpm/$vendor$p->{'NAME'}\@$vr").'?';
$purlurl .= '&epoch='.urlencode($p->{'EPOCH'}) if $p->{'EPOCH'};
$purlurl .= '&arch='.urlencode($p->{'ARCH'}) if $p->{'ARCH'};
$purlurl .= '&upstream='.urlencode($p->{'SOURCERPM'}) if $p->{'SOURCERPM'};
$purlurl .= '&distro='.urlencode($distro) if $distro;
$purlurl =~ s/\?\&/\?/;
$purlurl =~ s/\?$//;
return $purlurl;
}
my $wrap_intoto;
my $isproduct;
my $distro;
my $rpmmd;
while (@ARGV && $ARGV[0] =~ /^-/) {
my $opt = shift @ARGV;
if ($opt eq '--distro') {
$distro = shift @ARGV;
} elsif ($opt eq '--intoto') {
$wrap_intoto = 1;
} elsif ($opt eq '--product') {
$isproduct = 1;
} elsif ($opt eq '--rpmmd') {
$rpmmd = 1;
} else {
last if $opt eq '--';
die("unknown option: $opt\n");
}
}
die("usage: generate_spdx_sbom [--disto NAME] [--intoto] [--product] PRODUCT_DIRECTORY|CONTAINER_TAR\n") unless @ARGV == 1;
my $toprocess = $ARGV[0];
my $tmpdir = File::Temp::tempdir( CLEANUP => 1 );
my $files;
my $pkgs;
my $dist;
if ($isproduct) {
# product case
#$files = gen_filelist($toprocess);
$pkgs = read_pkgs_from_product_directory($toprocess);
} elsif ($rpmmd) {
require Build::Rpmmd;
my $primary;
if (-d $toprocess) {
my %d = map {$_->{'type'} => $_} @{Build::Rpmmd::parse_repomd("$toprocess/repomd.xml")};
my $primary = $d{'primary'};
die("no primary type in repomd.xml\n") unless $primary;
my $loc = $primary->{'location'};
$loc =~ s/.*\///;
$toprocess .= "/$loc";
}
die("$toprocess: $!\n") unless -e $toprocess;
$pkgs = read_pkgs_from_rpmmd($toprocess);
} else {
# container tar case
my $unpackdir = unpack_container($tmpdir, $toprocess);
dump_rpmdb($unpackdir, "$tmpdir/rpmdb");
$files = gen_filelist($unpackdir);
$pkgs = read_pkgs_rpmdb("$tmpdir/rpmdb");
$dist = read_dist($unpackdir);
}
my $subjectname = $toprocess;
$subjectname =~ s/.*\///;
if (!$distro && $dist) {
$distro = $dist->{'id'};
$distro .= "-$dist->{'version_id'}" if defined($dist->{'version_id'}) && $dist->{'version_id'} ne '';
$distro .= "-$dist->{'build_id'}" if defined($dist->{'build_id'}) && $dist->{'build_id'} ne '';
}
my $spdx = {
'spdxVersion' => 'SPDX-2.3',
'dataLicense' => 'CC0-1.0',
'SPDXID' => 'SPDXRef-DOCUMENT',
'name' => $subjectname,
};
my $creationinfo = {
'created' => rfc3339time(time()),
'creators' => [ 'Tool: obs_build_generate_spdx_sbom-1.0' ],
'licenseListVersion' => '3.19',
};
$spdx->{'creationInfo'} = $creationinfo;
for my $p (@$pkgs) {
my $vr = $p->{'VERSION'};
$vr = "$vr-$p->{'RELEASE'}" if defined $p->{'RELEASE'};
my $evr = $vr;
$evr = "$p->{'EPOCH'}:$evr" if $p->{'EPOCH'};
my $spdxpkg = {
'name' => $p->{'NAME'},
'versionInfo' => $evr,
};
$spdxpkg->{'originator'} = "Organization: $p->{'VENDOR'}" if $p->{'VENDOR'};
$spdxpkg->{'downloadLocation'} = 'NOASSERTION';
$spdxpkg->{'sourceInfo'} = 'acquired package info from RPM DB';
$spdxpkg->{'licenseConcluded'} = 'NOASSERTION';
$spdxpkg->{'licenseDeclared'} = 'NOASSERTION';
my $license = $p->{'LICENSE'};
if ($license) {
$license =~ s/ and / AND /g;
$spdxpkg->{'licenseConcluded'} = $license;
$spdxpkg->{'licenseDeclared'} = $license;
}
$spdxpkg->{'copyrightText'} = 'NOASSERTION';
my $purlurl = gen_purl_rpm($p, $distro);
my @xref;
push @xref, { 'referenceCategory' => 'PACKAGE-MANAGER', 'referenceType' => 'purl', 'referenceLocator', $purlurl } if $purlurl;
$spdxpkg->{'externalRefs'} = \@xref if @xref;
if (!$p->{'spdx_id'}) {
if ($p->{'SIGMD5'}) {
$p->{'spdx_id'} = "SPDXRef-Package-$p->{'NAME'}-".unpack('H*', $p->{'SIGMD5'});
} elsif ($p->{'CHECKSUM'}) {
my $id = $p->{'CHECKSUM'};
$id =~ s/.*://;
$id = substr($id, 0, 32);
$p->{'spdx_id'} = "SPDXRef-Package-$p->{'NAME'}-$id";
} else {
my $id = Digest::MD5::md5_hex(Build::SimpleJSON::unparse($p));
$p->{'spdx_id'} = "SPDXRef-Package-$p->{'NAME'}-$id";
}
$p->{'spdx_id'} =~ s/[^a-zA-Z0-9\.\-]/-/g;
}
$spdxpkg->{'SPDXID'} = $p->{'spdx_id'};
push @{$spdx->{'packages'}}, $spdxpkg;
}
for my $f (@$files) {
next if $f->{'SKIP'};
my $spdxfile = {
'fileName' => $f->{'name'},
'licenseConcluded' => 'NOASSERTION',
'copyrightText' => '',
};
my @chks;
push @chks, { 'algorithm' => 'SHA256', 'checksumValue' => $f->{'sha256sum'} } if $f->{'sha256sum'};
$spdxfile->{'checksums'} = \@chks if @chks;
$f->{'spdx_id'} = "SPDXRef-".Digest::MD5::md5_hex($f->{'name'}.($f->{'sha256sum'} || ''));
$spdxfile->{'SPDXID'} = $f->{'spdx_id'};
push @{$spdx->{'files'}}, $spdxfile;
}
if (@$files) {
my %f2p;
for my $p (@$pkgs) {
push @{$f2p{$_}}, $p for @{$p->{'FILENAMES'} || []};
}
for my $f (@$files) {
next if $f->{'SKIP'};
#warn("unpackaged file: $f->{'name'}\n") unless @{$f2p{$f->{'name'}} || []};
for my $p (@{$f2p{$f->{'name'}} || []}) {
next unless $f->{'spdx_id'} && $p->{'spdx_id'};
my $rel = {
'spdxElementId' => $p->{'spdx_id'},
'relatedSpdxElement' => $f->{'spdx_id'},
'relationshipType', 'CONTAINS',
};
push @{$spdx->{'relationships'}}, $rel;
}
}
}
push @{$spdx->{'relationships'}}, {
'spdxElementId' => 'SPDXRef-DOCUMENT',
'relatedSpdxElement' => 'SPDXRef-DOCUMENT',
'relationshipType', 'DESCRIBES',
};
my $uuid = pack('H*', '1e9d579964de4594a4e835719a1c259f'); # uuid ns
$uuid = substr(Digest::SHA::sha1($uuid . Build::SimpleJSON::unparse($spdx, 'template' => $spdx_json_template, 'keepspecial' => 1)), 0, 16);
substr($uuid, 6, 1, pack('C', unpack('@6C', $uuid) & 0x0f | 0x50));
substr($uuid, 8, 1, pack('C', unpack('@8C', $uuid) & 0x3f | 0x80));
$uuid = join('-', unpack("H8H4H4H4H12", $uuid));
$spdx->{'documentNamespace'} = 'http://open-build-service.org/spdx/'.urlencode($subjectname).'-'.$uuid;
if ($wrap_intoto) {
my $subject = { 'name' => $subjectname };
# no digest for products as it might be a directory. And an iso file would change the checksum later while signing.
$subject->{'digest'} = { 'sha256' => sha256file($toprocess) } unless $isproduct;
my $intoto = {
'_type' => 'https://in-toto.io/Statement/v0.1',
'subject' => [ $subject ],
'predicateType' => 'https://spdx.dev/Document',
'predicate' => $spdx,
};
print Build::SimpleJSON::unparse($intoto, 'template' => $intoto_json_template, 'keepspecial' => 1)."\n";
} else {
print Build::SimpleJSON::unparse($spdx, 'template' => $spdx_json_template, 'keepspecial' => 1)."\n";
}