Hello World

File unpackarchive of Package build

#!/usr/bin/perl
################################################################
#
# Copyright (c) 2021 SUSE LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 or 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see the file COPYING); if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#

use strict;
use Fcntl;
use POSIX;

# Slow but secure unpacking of tar/cpio archives.
# All files we cannot handle (e.g. device nodes) are ignored.

my $default_dirmode = 0700;
my $add_dirmode = 0700;

#
# Input/Output
#
sub readdata {
  my ($l, $eofok, $prepend) = @_;
  my $d = '';
  if (defined($prepend) && $prepend ne '') {
    die("prepend data is too big\n") if $l < length($prepend);
    $d = $prepend;
    $l -= length($d);
  }
  while ($l > 0) {
    my $r = read(STDIN, $d, $l > 65536 ? 65536 : $l, length($d));
    next if !defined($r) && $! == POSIX::EINTR;
    die("read error: $!\n") unless defined $r;
    return undef if $eofok && !$r && $d eq '';
    die("unexpected EOF\n") unless $r;
    $l -= $r;
  }
  return $d;
}

sub skipdata {
  my ($l) = @_;
  while ($l > 0) {
    my $d;
    my $r = read(STDIN, $d, $l > 65536 ? 65536 : $l);
    next if !defined($r) && $! == POSIX::EINTR;
    die("read error: $!\n") unless defined $r;
    die("unexpected EOF\n") unless $r;
    $l -= $r;
  }
}

sub writedata {
  my ($fd, $d) = @_;
  while (length($d)) {
    my $r = syswrite($fd, $d, length($d) > 65536 ? 65536 : length($d));
    next if !defined($r) && $! == POSIX::EINTR;
    die("write error: $!\n") unless defined $r;
    $d = substr($d, $r) if $r;
  }
}

sub copydata {
  my ($fd, $l) = @_;
  while ($l > 0) {
    my $d = readdata($l > 65536 ? 65536 : $l);
    writedata($fd, $d);
    $l -= length($d);
  }
}

#
# Cpio handling (newc format only)
#
sub cpio_parsehead {
  my ($cpiohead) = @_;
  return undef unless substr($cpiohead, 0, 6) eq '070701';
  my $mode = hex(substr($cpiohead, 14, 8));
  my $nlink = hex(substr($cpiohead, 38, 8));
  my $mtime = hex(substr($cpiohead, 46, 8));
  my $size  = hex(substr($cpiohead, 54, 8));
  my $pad = (4 - ($size % 4)) % 4;
  my $namesize = hex(substr($cpiohead, 94, 8));
  my $namepad = (6 - ($namesize % 4)) % 4;
  my $ent = { 'size' => $size, 'mtime' => $mtime, 'mode' => $mode, 'cpiotype' => ($mode >> 12 & 0xf), 'nlink' => $nlink };
  if ($nlink > 1) {
    my $ino = hex(substr($cpiohead, 6, 8));
    my $devmaj  = hex(substr($cpiohead, 62, 8));
    my $devmin  = hex(substr($cpiohead, 70, 8));
    $ent->{'lnkid'} = "$ino/$devmaj/$devmin";
  }
  return ($ent, $namesize, $namepad, $size, $pad);
}

my %cpiotype2type = (
  '1' => 'p',
  '2' => 'c',
  '4' => 'd',
  '6' => 'b',
  '8' => 'f',
  '10' => 'l',
  '12' => 's',
);

sub cpio_readhead {
  my ($prepend) = @_;
  my $cpiohead = readdata(110, 0, $prepend);
  die("cpio: not a 'SVR4 no CRC ascii' cpio\n") unless substr($cpiohead, 0, 6) eq '070701';
  my ($ent, $namesize, $namepad, $size, $pad) = cpio_parsehead($cpiohead);
  die("cannot parse cpio header\n") unless $ent;
  die("ridiculous long filename\n") if $namesize > 8192;
  my $name = readdata($namesize + $namepad);
  $name = substr($name, 0, $namesize);
  $name =~ s/\0.*//s;
  return undef if !$size && $name eq 'TRAILER!!!';
  $name =~ s/^\.\///s;
  $ent->{'name'} = $name;
  $ent->{'type'} = $cpiotype2type{$ent->{'cpiotype'}} || '?';
  if ($ent->{'cpiotype'} eq '10') {
    die("bad symlink size\n") if $size <= 0 || $size >= 65536;
    my $linkname = readdata($size + $pad);
    $linkname = substr($linkname, 0, $size);
    $linkname =~ s/\0.*//s;
    $ent->{'linkname'} = $linkname;
    $size = $pad = 0;
  } elsif ($ent->{'lnkid'}) {
    cpio_handlehardlink($ent);
  }
  return ($ent, $size, $pad);
}

my %cpio_hardlinks;

sub cpio_handlehardlink {
  my ($ent) = @_;
  my $lnkid = $ent->{'lnkid'};
  my $lnk = $cpio_hardlinks{$lnkid};
  if ($lnk) {
    $ent->{'linkname'} = $lnk->{'name'};
    delete $cpio_hardlinks{$lnkid} if --$lnk->{'nlink'} == 0;
  } else {
    $cpio_hardlinks{$lnkid}  = { 'name' => $ent->{'name'}, 'nlink' => $ent->{'nlink'} - 1 };
  }
}

#
# Tar handling (gnu, ustar, pax)
#
my @tar_headnames = qw{name mode uid gid size mtime chksum tartype linkname magic version uname gname major minor};
sub tar_parsehead {
  my ($tarhead) = @_;
  my @head = unpack('A100A8A8A8A12A12A8a1A100a6a2A32A32A8A8A155x12', $tarhead);
  /^([^\0]*)/s && ($_ = $1) for @head;
  $head[7] = '0' if $head[7] eq '';	# map old \0 type to 0
  $head[$_] = oct($head[$_]) for (1, 2, 3, 5, 6, 13, 14);
  my $pad;
  if (substr($tarhead, 124, 1) eq "\x80") {
    # not octal, but binary!
    my @s = unpack('aCSNN', substr($tarhead, 124, 12));
    $head[4] = $s[4] + (2 ** 32) * $s[3] + (2 ** 64) * $s[2];
    $pad = (512 - ($s[4] & 511)) & 511;
  } else {
    $head[4] = oct($head[4]);
    $pad = (512 - ($head[4] & 511)) & 511;
  }
  $head[7] = '0' if $head[7] eq '' || $head[7] =~ /\W/;
  $head[7] = '5' if $head[7] eq '0' && $head[0] =~ /\/$/s;	# dir
  if ($head[9] eq 'ustar' && $head[15] ne '') {		# ustar prefix handling
    $head[15] =~ s/\/$//s;
    $head[0] = "$head[15]/$head[0]";
  }
  my $ent = { map {$tar_headnames[$_] => $head[$_]} (0..$#tar_headnames) };
  return ($ent, $head[4], $pad);
}

sub tar_parseoverride {
  my ($override, $tartype, $data) = @_;
  $override ||= {};
  if ($tartype eq 'L') {
    $override->{'name'} = $data;
  } elsif ($tartype eq 'K') {
    $override->{'linkname'} = $data;
  } elsif ($tartype eq 'x' || $tartype eq 'X') {
    $override->{'ispax'} = 1;
    while ($data =~ /^(\d+) / && $1 > 3) {
      my $entry = substr($data, length($1) + 1, $1 - length($1) - 2);   # -2 because of space and newline
      $data = substr($data, $1);
      $override->{'name'} = substr($entry, 5) if substr($entry, 0, 5) eq 'path=';
      $override->{'linkname'} = substr($entry, 9) if substr($entry, 0, 9) eq 'linkpath=';
    }
  }
  return $override;
}

my %tartype2type = (
  '0' => 'f',
  '1' => 'L',
  '2' => 'l',
  '3' => 'c',
  '4' => 'b',
  '5' => 'd',
  '6' => 'p',
);

sub tar_readhead {
  my ($prepend) = @_;
  my $override;
  while (1) {
    my $tarhead = readdata(512, 1, $prepend);
    undef $prepend;
    return undef unless defined $tarhead;
    return undef if $tarhead eq "\0" x 512;
    next if substr($tarhead, 500, 12) ne "\0" x 12;
    my ($ent, $size, $pad) = tar_parsehead($tarhead);
    my $bsize = $size + $pad;
    my $tartype = $ent->{'tartype'};
    next if $tartype eq 'V';	# ignore volume lables
    if ($tartype eq 'L' || $tartype eq 'K' || $tartype eq 'x' || $tartype eq 'X') {
      die("bad extension block size\n") if $bsize < 1 || $bsize >= 1024 * 1024;
      $override = tar_parseoverride($override, $tartype, substr(readdata($bsize), 0, $size));
      next;
    }
    if ($override) {
      $ent->{$_} = $override->{$_} for keys %$override;
      undef $override;
    }
    $size = $pad = 0 if $tartype eq '2' || $tartype eq '3' || $tartype eq '4' || $tartype eq '5' || $tartype eq '6';
    $size = $pad = 0 if $tartype eq '1' && !$ent->{'ispax'};  # hard link magic
    $ent->{'type'} = $tartype2type{$tartype} || '?';
    return ($ent, $size, $pad);
  }
}


#
# Path resolving
#
my %cache;	# name => resolved_dir

sub flush_dircache {
  %cache = ();
}

sub create_dir {
  my ($root, $r, $mode) = @_;
  mkdir("$root$r", $mode | $add_dirmode) || die("mkdir $r: $!\n");
  $cache{$r} = "$r/";
}

sub resolve_dir {
  my ($root, $name, $linkdepth) = @_;
  return '/' if $name eq '' || $name eq '/';
  my $c = $cache{$name};
  return $c if defined $c;
  my ($dir, $comp) = ('/', $name);
  ($dir, $comp) = ($1, $2) if $name =~ /\A(.*?)\/+([^\/]*)\z/s;
  my $r = resolve_dir($root, $dir);
  if ($comp eq '.' || $comp eq '') {
    $cache{$name} = $r;
    return $r;
  }
  if ($comp eq '..') {
    $r = '/' unless $r =~ s/\/[^\/]*\/\z/\//s;
    $cache{$name} = $r;
    return $r;
  }
  my $rcomp = "$r$comp";
  my @s = lstat("$root$rcomp");
  if (!@s) {
    die("$rcomp: $!\n") unless $! == POSIX::ENOENT;
    create_dir($root, "$rcomp", $default_dirmode);
    $r = "$rcomp/";
  } elsif (-l _) {
    die("$name: Too many levels of symbolic links\n") if ++$linkdepth > 20;
    my $link = readlink("$root$rcomp");
    die("readlink $rcomp: $!\n") unless defined $link;
    $r = resolve_dir($root, "$r$link", ($linkdepth || 0) + 1);
  } elsif (-d _) {
    $r = "$rcomp/";
  } else {
    die("$rcomp: Not a directory\n");
  }
  $cache{$name} = $r;
  return $r;
}

sub resolve_file {
  my ($root, $name) = @_;
  $name =~ s/\/+\z//s;
  return '/.' if $name eq '' || $name eq '/';
  my ($dir, $comp) = ('/', $name);
  ($dir, $comp) = ($1, $2) if $name =~ /\A(.*?)\/+([^\/]*)\z/s;
  $dir = $cache{$dir} || resolve_dir($root, $dir);
  return "$dir$comp";
}

#
# Decompression handling
#
sub detect_decompressor {
  my ($first16bytes) = @_;
  my @sig = unpack('N4', $first16bytes);
  return qw{gzip -dc} if ($sig[0] & 0xffffffe0) == 0x1f8b0800;
  return qw{bzip2 -dc} if ($sig[0] & 0xfffffff0) == 0x425a6830 && ($sig[1] == 0x31415926 || $sig[1] == 0x17724538);
  return qw{xz -dc} if ($sig[0] & 0xffffffff) == 0xfd377a58 && ($sig[1] & 0xffff0000) == 0x5a000000;
  return qw{zstd -dc} if ($sig[0] & 0xffffff00) == 0x28b52f00;
  return ();
}

sub handle_decompression {
  my $first16bytes = readdata(16);
  my @decomp = detect_decompressor($first16bytes);
  return $first16bytes unless @decomp;
  local *F;
  open(F, "<&STDIN") || die("stdin dup: $!\n");
  my $pid = open(STDIN, '-|');
  die("fork: $!\n") unless defined $pid;
  if (!$pid) {
    local *G;
    my $pid2 = open(G, '|-');
    die("fork: $!\n") unless defined $pid2;
    if (!$pid2) {
      exec(@decomp);
      die("$decomp[0]: $!\n");
    }
    print G $first16bytes if $first16bytes ne '';
    while (1) {
      my $d = '';
      my $r = read(F, $d, 8192);
      next if !defined($r) && $! == POSIX::EINTR;
      die("read error: $!\n") unless defined $r;
      exit(0) unless $r;
      print G $d;
    }
    close(G) || die("pipe close: $!\n");
  }
  close(F);
  return undef;
}


#
# Main
#
die("usage: unpackarchive --cpio|--tar [-C <root>]\n") unless @ARGV;
my $format = shift @ARGV;
my $root = '.';
while (@ARGV) {
  if (@ARGV > 1 && $ARGV[0] eq '-C') {
    (undef, $root) = splice(@ARGV, 0, 2);
  } elsif (@ARGV && $ARGV[0] eq '-J') {
    shift @ARGV;
  } elsif ($ARGV[0] eq '-j' || $ARGV[0] eq '-J' || $ARGV[0] eq '-z') {
    shift @ARGV;
  } elsif ($ARGV[0] =~ /^-/) {
    die("unpackarchive: unsupported option $ARGV[0]\n");
  } else {
    die("usage: unpackarchive --cpio|--tar [-C <root>]\n");
  }
}

die("$root: No such file or directory\n") unless -d $root;
my $readhead;
$readhead = \&cpio_readhead if $format eq '--cpio';
$readhead = \&tar_readhead if $format eq '--tar';
die("unknown format option $format\n") unless $readhead;

umask(0);	# do not mess with the modes

my $prepend = handle_decompression();
while (1) {
  my ($ent, $size, $pad) = $readhead->($prepend);
  undef $prepend;
  last unless defined $ent;
  my $name = $ent->{'name'};
  my $type = $ent->{'type'};
  $name =~ s/\A\/+//s;
  $name =~ s/\/+\z//s;
  $ent->{'name'} = $name;
  if ($name eq '') {
    skipdata($size + $pad);
    next;
  }
  if ($type ne 'd' && $type ne 'f' && $type ne 'l' && $type ne 'L') {
    skipdata($size + $pad);
    next;
  }
  my $linkname = $ent->{'linkname'};
  #if (defined($linkname) && $linkname ne '') {
  #  print "$ent->{'type'} $name -> $linkname\n";
  #} else {
  #  print "$ent->{'type'} $name\n";
  #}
  $name = resolve_file($root, $name);

  my @s = lstat("$root$name");
  die("$name: $!\n") if !@s && $! != POSIX::ENOENT;
  if (@s) {
    # entry already exists. handle conflicts.
    if (-d _) {
      die("$name: Is a directory\n") unless $type eq 'd';
      chmod(($ent->{'mode'} & 07777) | $add_dirmode, "$root$name");
      utime($ent->{'mtime'}, $ent->{'mtime'}, "$root$name");
      skipdata($size + $pad);
      next;
    } elsif (-f _) {
      unlink("$root$name") if $type eq 'l' || $type eq 'd';
    } else {
      unlink("$root$name") || die("unlink $name: $!\n");
      flush_dircache();		# start over from scratch
    }
  }

  if (defined($linkname) && $linkname ne '') {
    if ($type eq 'l') {
      symlink($linkname, "$root$name") || die("symlink $linkname $name: $!\n");
      skipdata($size + $pad);
      next;
    }
    if ($type ne 'L' && $type ne 'f') {
      skipdata($size + $pad);
      next;
    }
    $linkname = resolve_file($root, $linkname);
    my @s2 = lstat("$root$linkname");
    die("$linkname: $!\n") unless @s2;
    die("$linkname: Not a regular file\n") unless -f _;
    link("$root$linkname", "$root$name") || die("link $linkname $name: $!\n");
    $type = 'f' if $type eq 'L' && $size > 0;
  }
  if ($type eq 'd') {
    create_dir($root, $name, $ent->{'mode'} & 07777);
    utime($ent->{'mtime'}, $ent->{'mtime'}, "$root$name");
  } elsif ($type eq 'f') {
    my $fd;
    sysopen($fd, "$root$name", O_WRONLY|O_CREAT|O_TRUNC, $ent->{'mode'} & 07777) || die("$root$name: $!\n");
    copydata($fd, $size);
    close($fd) || die("close $name: $!\n");
    utime($ent->{'mtime'}, $ent->{'mtime'}, "$root$name");
    skipdata($pad);
  } else {
    skipdata($size + $pad);
  }
}