#!/usr/bin/perl
# Name: /usr/local/bin/rrsync (should also have a symlink in /usr/bin)
# Purpose: Restricts rsync to subdirectory declared in .ssh/authorized_keys
# Author: Joe Smith <js-cgi@inwap.com> 30-Sep-2004
# Modified by: Wayne Davison <wayned@samba.org>
# From: https://opensource.apple.com/source/rsync/rsync-52/rsync/support/rrsync
# Modified 7 February 2022 by Jim Lippard to update location of rsync.
#   Should work as a replacement for rsync_wrapper.sh, with -ro for rsnapshot
#   and (with a new public key) limiting to /var/install for distribute.pl.
#   Might want to borrow some ideas from this for rsync-server/client which
#   are already more strict than this. Had to add some more short options.
#   Handling of -ro|-doas is kind of lame, but I don't currently need to
#   use both at the same time. Using -ro for backup, -doas for distribute.pl,
#   and rsync-client/server.pl do their own thing to restrict.
# Modified 13 February 2022 by Jim Lippard to log for both doas and ro.
# Modified 14 February 2022 by Jim Lippard to log restricted subdir.
# Modified 2 December 2023 by Jim Lippard to use pledge and unveil to
#   enforce restrictions on OpenBSD.
# Modified 30 December 2023 by Jim Lippard to call pledge correctly.
# Modified 31 December 2023 by Jim Lippard to expand unveil to cover
#   /usr/share/zoneinfo.
# Modified 2 January 2024 by Jim Lippard to add support for rsync options
#   found in python version: block-size, checksum-choice, compress-choice,
#   copy-devices, debug, delete-delay, delete-missing-args, fake-super,
#   fsync, group, groupmap, hard-links, iconv, ignore-existing,
#   ignore-missing-args, ignore-times, info, links, log-file, max-alloc,
#   mkpath, msgs2stderr, munge-links, new-compress, no-msgs2stderr,
#   no-munge-links, no-w, old-compress, one-file-system, open-noatime,
#   owner, perms, preallocate, recursive, skip-compress, stats, stderr,
#   times, use-qsort, usermap, write-devices. Added seconds to log message.
#   Short no args added: A J N U X q s y removed e f i . (dot is separator
#     for e option used internally by rsync);
#   Short with num added: @;
#   Short disabled added: s; Short disabled subdir added: K L k
#   Set log-file opt to -1 if ro.
#   Parse arguments better (including -e. options).
#   Added most options from python version: -wo, -munge, -no-del.
#   Omitted -no-lock and locking dirs.
# Modified 3 January 2024 by Jim Lippard to add wo, munge, no-del to logging.
# Modified 30 July 2024 by Jim Lippard to add -no-lock and locking, and
#    reuse additional checks from this patch: https://attachments.samba.org/attachment.cgi?id=14662
# Modified 8 October 2024 by Jim Lippard to change no-w to no-W.
# Modified 14 January 2025 by Jim Lippard to allow "true" as command like
#    python version.
# Modified 14 September 2025 by Jim Lippard to define RSYNC path based
#    on operating system (Darwin and linux use /usr/bin; OpenBSD uses
#    /usr/local/bin). doas location is same for Linux apt and OpenBSD.
# Modified 9 December 2025 by Jim Lippard to add -no-overwrite and update
#    options settings.
# Modified 8 April 2026 by Jim Lippard to add "use warnings".
# Modified 17 May 2026 by Jim Lippard to validate args after glob
#    expansion instead of before (already mitigated on OpenBSD but
#    was exploitable under Linux/macOS; issue identified by Gemini
#    and confirmed by Claude, not present in python version)).
#    Remove hostname resolution on IP addresses (which doesn't
#    support IPv6 anyway), remove unneeded 'dns' pledge. Made minimal
#    fix on vuln instead of Claude-suggested new validate_arg subroutine.
# Modified 19 May 2026 by Jim Lippard to remove padding on IP address
#    in logging and log original args instead of slash-stripped ones.
# Modified 30 May 2026 by Jim Lippard to add missing 'dirs' long option.

use strict;
use warnings;

use Cwd 'abs_path';
use File::Glob ':glob';
use Fcntl ':flock';

use if $^O eq "openbsd", "OpenBSD::Pledge";
use if $^O eq "openbsd", "OpenBSD::Unveil";

sub get_rsync_path {
    return ('/usr/local/bin/rsync') if ($^O eq 'openbsd');
    return ('/usr/bin/rsync'); # Darwin/macOS, Linux
}

# You may configure these values to your liking.  See also the section
# of options if you want to disable any options that rsync accepts.
use constant RSYNC => get_rsync_path();
use constant DOAS => '/usr/bin/doas';
use constant LOGFILE => '/home/_rsyncu/rrsync.log';
use constant LOCKFILE => '/home/_rsyncu/.rrsync.lock';
use constant ZONEINFO_DIR => '/usr/share/zoneinfo';

my $Usage = <<EOM;
Usage: rrsync [-ro | -wo] [-munge] [-no-del] [-no-lock] [-no-overwrite] [-help] DIR
Use 'command="$0 [options] DIR"'
	in front of lines in $ENV{HOME}/.ssh/authorized_keys
EOM

# parse rrsync command line
my $arg;
our ($doas, $ro, $wo, $munge, $no_del, $no_lock, $no_overwrite, $subdir);
while (@ARGV) {
    $arg = shift;
    if ($arg eq '-doas') { # -doas = use doas
	$doas = 1;
    }
    elsif ($arg eq '-ro') { # -ro = Read-Only, implies -no-del -no-lock
	$ro = 1;
	$no_del = 1;
	$no_lock = 1;
    }
    elsif ($arg eq '-wo') { # -wo = Write-Only
	$wo = 1;
    }
    elsif ($arg eq '-munge') { # -munge = enforce -munge-links on server
	$munge = 1;
    }
    elsif ($arg eq '-no-del') { # -no-del = disable --delete*, --remove* options
	$no_del = 1;
    }
    elsif ($arg eq '-no-lock') { # -no-lock = do not use lock
	$no_lock = 1;
    }
    elsif ($arg eq '-no-overwrite') { # -no-overwrite = set --ignore-existing
	$no_overwrite = 1;
    }
    elsif ($arg eq '-help') { # -help = die with usage msg
	die "$Usage\n";
    }
    elsif (substr ($arg, 0, 1) eq '-') {
	die "$0: error: unrecognized option: $arg\n$Usage";
    }
    elsif (@ARGV) {
	die "$0: error: unrecognized arguments: @ARGV\n$Usage";
    }
    else {
	$subdir = $arg;
    }
}
die "$0: No subdirectory specified\n$Usage" unless defined $subdir;
$subdir = abs_path($subdir);
die "$0: Restricted directory does not exist!\n" if $subdir ne '/' && !-d $subdir;

die "$0: -ro and -wo are mutually exclusive\n" if ($ro && $wo);

# The client uses "rsync -av -e ssh src/ server:dir/", and sshd on the server
# executes this program when .ssh/authorized_keys has 'command="..."'.
# For example:
# command="rrsync logs/client" ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAIEAzGhEeNlPr...
# command="rrsync -ro results" ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAIEAmkHG1WCjC...
#
# Format of the envrionment variables set by sshd:
# SSH_ORIGINAL_COMMAND=rsync --server          -vlogDtpr --partial . ARG # push
# SSH_ORIGINAL_COMMAND=rsync --server --sender -vlogDtpr --partial . ARGS # pull
# SSH_CONNECTION=client_addr client_port server_port

my $command = $ENV{SSH_ORIGINAL_COMMAND};
die "$0: Not invoked via sshd\n$Usage"	unless defined $command;
exit(0) if ($command eq 'true'); # Allow checking connectivity with "ssh <host> true".
die "$0: SSH_ORIGINAL_COMMAND='$command' is not rsync\n" unless $command =~ s/^rsync\s+//;
our $am_sender = $command =~ /^--server\s+--sender\s/; # Restrictive on purpose!
die "$0 -ro: sending to read-only server not allowed\n" if $ro && !$am_sender;
die "$0 -wo: reading from write-only server not allowed\n" if $wo && $am_sender;

# If on OpenBSD, enforce restricted directory and read-only.
if ($^O eq 'openbsd') {
    my @promises = ('rpath', 'wpath', 'cpath', 'unveil', 'exec', 'proc');
    push (@promises, 'flock') if (!$no_lock);
    # still need wpath because of log file. stdio already included.
    pledge (@promises) || die "$0: Cannot pledge promises. $!\n";

    # unveil subdir.
    if ($ro) {
	unveil ($subdir, 'r');
    }
    else {
	unveil ($subdir, 'rwc');
    }

    # unveil logfile, zoneinfo, resolv.conf, rsync command, doas.
    # need c for append, oddly.
    unveil (LOGFILE, 'rwc');
    unveil (LOCKFILE, 'rwc') if (!$no_lock);
    unveil (ZONEINFO_DIR, 'r');
    unveil (RSYNC, 'rx');
    if ($doas) {
	unveil (DOAS, 'rx');
    }

    unveil ();
}

### START of options data produced by the cull_options script. ###

# These options are the only options that rsync might send to the server,
# and only in the option format that the stock rsync produces.

# To disable a short-named option, add its letter to this string:
our $short_disabled = 's';

# These are disabled when the restricted dir is not "/":
our $short_disabled_subdir = 'KLk';

our $short_no_arg = 'ACDEHIJKLNORSUWXbcdefgiklmnopqrstuvxyz'; # DO NOT REMOVE ANY
our $short_with_num = '@B'; # DO NOT REMOVE ANY

# To disable a long-named option, change its value to a -1.  The values mean:
# 0 = the option has no arg; 1 = the arg doesn't need any checking; 2 = only
# check the arg when receiving; and 3 = always check the arg.
our %long_opt = (
  'append' => 0,
  'backup-dir' => 2,
  'block-size' => 1,
  'bwlimit' => 1,
  'checksum-choice' => 1,
  'checksum-seed' => 1,
  'compare-dest' => 2,
  'compress-choice' => 1,
  'compress-level' => 1,
  'compress-threads' => 1,
  'copy-dest' => 2,
  'copy-devices' => -1,
  'copy-unsafe-links' => 0,
  'daemon' => -1,
  'debug' => 1,
  'delay-updates' => 0,
  'delete' => 0,
  'delete-after' => 0,
  'delete-before' => 0,
  'delete-delay' => 0,
  'delete-during' => 0,
  'delete-excluded' => 0,
  'delete-missing-args' => 0,
  'dirs' => 0,
  'existing' => 0,
  'fake-super' => 0,
  'files-from' => 3,
  'force' => 0,
  'from0' => 0,
  'fsync' => 0,
  'fuzzy' => 0,
  'group' => 0,
  'groupmap' => 1,
  'hard-links' => 0,
  'iconv' => 1,
  'ignore-errors' => 0,
  'ignore-existing' => 0,
  'ignore-missing-args' => 0,
  'ignore-times' => 0,
  'info' => 1,
  'inplace' => 0,
  'link-dest' => 2,
  'links' => 0,
  'list-only' => 0,
  'log-file' => 3,
  'log-format' => 1,
  'max-alloc' => 1,
  'max-delete' => 1,
  'max-size' => 1,
  'min-size' => 1,
  'mkpath' => 0,
  'modify-window' => 1,
  'msgs2stderr' => 0,
  'munge-links' => 0,
  'new-compress' => 0,
  'no-implied-dirs' => 0,
  'no-msgs2stderr' => 0,
  'no-munge-links' => -1,
  'no-r' => 0,
  'no-relative' => 0,
  'no-specials' => 0,
  'no-W' => 0,
  'numeric-ids' => 0,
  'old-compress' => 0,
  'one-file-system' => 0,
  'only-write-batch' => 1,
  'open-noatime' => 0,
  'owner' => 0,
  'partial' => 0,
  'partial-dir' => 2,
  'perms' => 0,
  'preallocate' => 0,
  'recursive' => 0,
  'remove-sent-files' => $ro ? -1 : 0,
  'remove-source-files' => $ro ? -1 : 0,
  'safe-links' => 0,
  'sender' => 0,
  'server' => 0,
  'size-only' => 0,
  'skip-compress' => 1,
  'specials' => 0,
  'stats' => 0,
  'stderr' => 1,
  'suffix' => 1,
  'super' => 0,
  'temp-dir' => 2,
  'timeout' => 1,
  'times' => 0,
  'use-qsort' => 0,
  'usermap' => 1,
  'write-devices' => -1,
);

### END of options data produced by the cull_options script. ###

if ($subdir ne '/') {
    # Disable anything that might follow a symlink out of the restricted dir
    $short_disabled .= $short_disabled_subdir;
    $long_opt{'copy-unsafe-links'} = -1;
    $long_opt{'no-implied-dirs'} = -1;
}
if ($short_disabled ne '') {
    $short_no_arg =~ s/[$short_disabled]//go;
    $short_with_num =~ s/[$short_disabled]//go;
}
$short_no_arg = "[$short_no_arg]" if length($short_no_arg) > 1;
$short_with_num = "[$short_with_num]" if length($short_with_num) > 1;

$long_opt{'log-file'} = -1 if ($ro);
$long_opt{'sender'} = -1 if ($wo || !$am_sender);

if ($no_del) {
    for (grep /^remove|^delete/, keys %long_opt)
    {
	$long_opt{$_} = -1;
    }
}

my $write_log = -f LOGFILE && open(LOG, '>>', LOGFILE);

if (!$no_lock) {
    open (LOCK_FH, '>>', LOCKFILE) or die "open lockfile: $!";
    flock LOCK_FH, ($am_sender ? LOCK_SH : LOCK_EX) or die "lock lockfile: $!";
}

chdir($subdir) or die "$0: Unable to chdir to restricted dir: $!\n";

my(@opts, @args, @log_args);
my $in_options = 1;
my $last_opt = '';
my $check_type;
while ($command =~ /((?:[^\s\\]+|\\.[^\s\\]*)+)/g) {
  $_ = $1;
  if ($check_type) {
    push(@opts, check_arg($last_opt, $_, $check_type));
    $check_type = 0;
  } elsif ($in_options) {
    push(@opts, $_);
    if ($_ eq '.') {
      $in_options = 0;
    } else {
      next if /^-(?=.)$short_no_arg*(e\d*\.\w*)?$/o || /^-$short_with_num\d+$/o;

      my($opt,$arg) = /^--([^=]+)(?:=(.*))?$/;
      my $disabled;
      if (defined $opt) {
	my $ct = $long_opt{$opt};
	last unless defined $ct;
	next if $ct == 0;
	if ($ct > 0) {
	  if (!defined $arg) {
	    $check_type = $ct;
	    $last_opt = $opt;
	    next;
	  }
	  $arg = check_arg($opt, $arg, $ct);
	  $opts[-1] =~ s/=.*/=$arg/;
	  next;
	}
	$disabled = 1;
	$opt = "--$opt";
      } elsif ($short_disabled ne '') {
	$disabled = /^-$short_no_arg*([$short_disabled])/o;
	$opt = "-$1";
      }

      last unless $disabled; # Generate generic failure
      die "$0: option $opt has been disabled on this server.\n";
    }
  } else {
      # Save original args for logging.
      push(@log_args, $_);
      # Validate args to ensure they don't try to leave our restricted dir.
      s#//+#/#g;
      s#^/##;
      s#^$#.#;
      push(@args, bsd_glob($_, GLOB_LIMIT|GLOB_NOCHECK|GLOB_BRACE|GLOB_QUOTE));
  }
}

# Validate all args after any glob expansion.
if ($subdir ne '/') {
    foreach my $arg (@args) {
        die "Do not use .. in any path!\n" if $arg =~ m#(^|/)\.\.(/|$)#;
        die "$0: arg not under subdir\n" unless abs_is_under($arg, $subdir);
    }
}

die "$0: invalid rsync-command syntax or options\n" if $in_options;

@args = ( '.' ) if !@args;

# munge: add --munge-links
push (@opts, '--munge-links') if ($munge);
# no_overwrite: add --ignore-exising
push (@opts, '--ignore-existing') if ($no_overwrite);

if ($write_log) {
    my ($ss,$mm,$hh,$day,$month,$year) = (localtime)[0,1,2,3,4,5];
    my $host = $ENV{SSH_CONNECTION} || 'unknown';
    $host =~ s/ .*//; # Keep only the client's IP addr
    $host =~ s/^::ffff://;
    $month += 1;
    $year += 1900;
    printf LOG "%04d-%02d-%02d %02d:%02d:%02d %s [%s (%s)%s%s%s%s%s%s%s]\n", $year, $month, $day, $hh, $mm, $ss, $host, "@opts @log_args", $subdir, $doas ? '(doas)' : '', $ro ? '(ro)' : '', $wo ? '(wo)' : '', $munge ? '(munge)' : '', $no_del ? '(no-del)' : '', $no_lock ? '(no-lock)' : '', $no_overwrite ? '(no-overwrite)' : '';
    close LOG;
}

# Note: This assumes that the rsync protocol will not be maliciously hijacked.
if ($doas) {
    exit system(DOAS, RSYNC, @opts, '--', @args);
}
else {
    exit system(RSYNC, @opts, '--', @args);
}

sub abs_is_under {
    my ($path, $under_abspath) = @_;
    for (;;) {
	my $a = abs_path($path);
	if (defined $a) {
	    return $a =~ m{^\Q$under_abspath\E(/|$)};
	}
	die "abs_path failed on .: $!" if $path eq '.';
	$path =~ s{/[^/]*$}{} or $path = '.';
    }
}

sub check_arg
{
  my($opt, $arg, $type) = @_;
  $arg =~ s/\\(.)/$1/g;
  if ($subdir ne '/' && ($type == 3 || ($type == 2 && !$am_sender))) {
    $arg =~ s#//#/#g;
    die "Do not use .. in --$opt; anchor the path at the root of your restricted dir.\n"
      if $arg =~ m#(^|/)\.\.(/|$)#;
    $arg =~ s#^/#$subdir/#;
    die "--$opt value outside restricted dir.\n"
	unless abs_is_under($arg, $subdir);
  }
  $arg;
}
