From 214ac208491bbd8369d8c540e3571712d2e6665b Mon Sep 17 00:00:00 2001 From: smitsohu Date: Tue, 19 Jul 2022 14:58:32 +0200 Subject: protocol filter: add x32 ABI handling --- src/fseccomp/protocol.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/fseccomp/protocol.c b/src/fseccomp/protocol.c index 25742c173..ea5cd5bd4 100644 --- a/src/fseccomp/protocol.c +++ b/src/fseccomp/protocol.c @@ -132,15 +132,18 @@ void protocol_build_filter(const char *prlist, const char *fname) { EXAMINE_SYSCALL, // 1 // checking SYS_socket only: filtering SYS_socketcall not possible with seccomp ONLY(359), // 1 + 2 - BPF_JUMP(BPF_JMP+BPF_JA+BPF_K, (3 + 1 + 2), 0, 0), // 1 + 2 + 1 + BPF_JUMP(BPF_JMP+BPF_JA+BPF_K, (3 + 1 + 3 + 2), 0, 0), // 1 + 2 + 1 #else #warning 32 bit protocol filter not implemented yet for your architecture #endif VALIDATE_ARCHITECTURE, // 3 EXAMINE_SYSCALL, // 3 + 1 - ONLY(SYS_socket), // 3 + 1 + 2 +#if defined __x86_64__ + HANDLE_X32, // 3 + 1 + 3 +#endif + ONLY(SYS_socket), // 3 + 1 (+ 3) + 2 - EXAMINE_ARGUMENT(0) // 3 + 1 + 2 + 1 + EXAMINE_ARGUMENT(0) // 3 + 1 (+ 3) + 2 + 1 }; memcpy(ptr, &filter_start[0], sizeof(filter_start)); ptr += sizeof(filter_start); -- cgit v1.2.3-54-g00ecf From 87afef810c2dfbf67420dc76a67c707fbb7353db Mon Sep 17 00:00:00 2001 From: smitsohu Date: Tue, 19 Jul 2022 15:19:24 +0200 Subject: introduce new option restrict-namespaces --- contrib/vim/syntax/firejail.vim | 6 +- etc/profile-a-l/default.profile | 1 + etc/profile-m-z/server.profile | 1 + etc/templates/profile.template | 1 + src/firejail/firejail.h | 2 + src/firejail/main.c | 14 +++ src/firejail/preproc.c | 10 +- src/firejail/profile.c | 27 +++++- src/firejail/sandbox.c | 10 ++ src/firejail/seccomp.c | 31 ++++++- src/firejail/usage.c | 3 + src/fseccomp/fseccomp.h | 4 + src/fseccomp/main.c | 6 ++ src/fseccomp/namespaces.c | 197 ++++++++++++++++++++++++++++++++++++++++ src/include/rundefs.h | 2 + src/man/firejail-profile.txt | 6 ++ src/man/firejail.txt | 24 +++++ src/zsh_completion/_firejail.in | 4 +- 18 files changed, 344 insertions(+), 5 deletions(-) create mode 100644 src/fseccomp/namespaces.c diff --git a/contrib/vim/syntax/firejail.vim b/contrib/vim/syntax/firejail.vim index 51e9cfdad..9099a0808 100644 --- a/contrib/vim/syntax/firejail.vim +++ b/contrib/vim/syntax/firejail.vim @@ -17,6 +17,9 @@ syn match fjComment "#.*$" contains=fjTodo syn keyword fjCapability audit_control audit_read audit_write block_suspend chown dac_override dac_read_search fowner fsetid ipc_lock ipc_owner kill lease linux_immutable mac_admin mac_override mknod net_admin net_bind_service net_broadcast net_raw setgid setfcap setpcap setuid sys_admin sys_boot sys_chroot sys_module sys_nice sys_pacct sys_ptrace sys_rawio sys_resource sys_time sys_tty_config syslog wake_alarm nextgroup=fjCapabilityList contained syn match fjCapabilityList /,/ nextgroup=fjCapability contained +syn keyword fjNamespaces cgroup ipc net mnt pid time user uts nextgroup=fjNamespacesList contained +syn match fjNamespacesList /,/ nextgroup=fjNamespaces contained + syn keyword fjProtocol unix inet inet6 netlink packet nextgroup=fjProtocolList contained syn match fjProtocolList /,/ nextgroup=fjProtocol contained @@ -48,7 +51,7 @@ syn keyword fjFilter filter contained syn match fjVar /\v\$\{(CFG|DESKTOP|DOCUMENTS|DOWNLOADS|HOME|MUSIC|PATH|PICTURES|RUNUSER|VIDEOS)}/ " Commands grabbed from: src/firejail/profile.c -" Generate list with: { rg -o 'strn?cmp\(ptr, "([^"]+) "' -r '$1' src/firejail/profile.c; echo private-lib; } | grep -vEx '(include|ignore|caps\.drop|caps\.keep|protocol|seccomp|seccomp\.drop|seccomp\.keep|env|rmenv|net|ip)' | sort -u | tr $'\n' '|' # private-lib is special-cased in the code and doesn't match the regex; grep-ed patterns are handled later with 'syn match nextgroup=' directives (except for include which is special-cased as a fjCommandNoCond keyword) +" Generate list with: { rg -o 'strn?cmp\(ptr, "([^"]+) "' -r '$1' src/firejail/profile.c; echo private-lib; } | grep -vEx '(include|ignore|caps\.drop|caps\.keep|protocol|restrict-namespaces|seccomp|seccomp\.drop|seccomp\.keep|env|rmenv|net|ip)' | sort -u | tr $'\n' '|' # private-lib is special-cased in the code and doesn't match the regex; grep-ed patterns are handled later with 'syn match nextgroup=' directives (except for include which is special-cased as a fjCommandNoCond keyword) syn match fjCommand /\v(bind|blacklist|blacklist-nolog|cpu|defaultgw|dns|hostname|hosts-file|ip6|iprange|join-or-start|mac|mkdir|mkfile|mtu|name|netfilter|netfilter6|netmask|nice|noblacklist|noexec|nowhitelist|overlay-named|private|private-bin|private-cwd|private-etc|private-home|private-lib|private-opt|private-srv|read-only|read-write|rlimit-as|rlimit-cpu|rlimit-fsize|rlimit-nofile|rlimit-nproc|rlimit-sigpending|timeout|tmpfs|veth-name|whitelist|xephyr-screen) / skipwhite contained " Generate list with: rg -o 'strn?cmp\(ptr, "([^ "]*[^ ])"' -r '$1' src/firejail/profile.c | grep -vEx '(include|rlimit|quiet)' | sed -e 's/\./\\./' | sort -u | tr $'\n' '|' # include/rlimit are false positives, quiet is special-cased below syn match fjCommand /\v(allow-debuggers|allusers|apparmor|caps|deterministic-exit-code|deterministic-shutdown|disable-mnt|ipc-namespace|keep-config-pulse|keep-dev-shm|keep-fd|keep-var-tmp|machine-id|memory-deny-write-execute|netfilter|no3d|noautopulse|nodbus|nodvd|nogroups|noinput|nonewprivs|noprinters|noroot|nosound|notv|nou2f|novideo|overlay|overlay-tmpfs|private|private-cache|private-cwd|private-dev|private-lib|private-tmp|seccomp|seccomp\.32|seccomp\.block-secondary|tracelog|writable-etc|writable-run-user|writable-var|writable-var-log|x11)$/ contained @@ -56,6 +59,7 @@ syn match fjCommand /ignore / nextgroup=fjCommand,fjCommandNoCond skipwhite cont syn match fjCommand /caps\.drop / nextgroup=fjCapability,fjAll skipwhite contained syn match fjCommand /caps\.keep / nextgroup=fjCapability skipwhite contained syn match fjCommand /protocol / nextgroup=fjProtocol skipwhite contained +syn match fjCommand /restrict-namespaces / nextgroup=fjNamespaces skipwhite contained syn match fjCommand /\vseccomp(\.32)?(\.drop|\.keep)? / nextgroup=fjSyscall skipwhite contained syn match fjCommand /x11 / nextgroup=fjX11Sandbox skipwhite contained syn match fjCommand /env / nextgroup=fjEnvVar skipwhite contained diff --git a/etc/profile-a-l/default.profile b/etc/profile-a-l/default.profile index dac842bb6..397a89bee 100644 --- a/etc/profile-a-l/default.profile +++ b/etc/profile-a-l/default.profile @@ -60,3 +60,4 @@ seccomp # deterministic-shutdown # memory-deny-write-execute # read-only ${HOME} +# restrict-namespaces diff --git a/etc/profile-m-z/server.profile b/etc/profile-m-z/server.profile index fd7ffb38d..8d8a1dac6 100644 --- a/etc/profile-m-z/server.profile +++ b/etc/profile-m-z/server.profile @@ -90,6 +90,7 @@ dbus-user none # deterministic-shutdown # memory-deny-write-execute # read-only ${HOME} +# restrict-namespaces # writable-run-user # writable-var # writable-var-log diff --git a/etc/templates/profile.template b/etc/templates/profile.template index 28339765f..59083f660 100644 --- a/etc/templates/profile.template +++ b/etc/templates/profile.template @@ -228,3 +228,4 @@ include globals.local ##noexec PATH ##read-only ${HOME} ##read-write ${HOME} +#restrict-namespaces diff --git a/src/firejail/firejail.h b/src/firejail/firejail.h index f8a23678a..b744ebd45 100644 --- a/src/firejail/firejail.h +++ b/src/firejail/firejail.h @@ -198,6 +198,7 @@ typedef struct config_t { char *seccomp_list_drop, *seccomp_list_drop32; // seccomp drop list char *seccomp_list_keep, *seccomp_list_keep32; // seccomp keep list char *protocol; // protocol list + char *restrict_namespaces; // namespaces list char *seccomp_error_action; // error action: kill, log or errno // rlimits @@ -633,6 +634,7 @@ int seccomp_load(const char *fname); int seccomp_filter_drop(bool native); int seccomp_filter_keep(bool native); int seccomp_filter_mdwx(bool native); +int seccomp_filter_namespaces(bool native, const char *list); void seccomp_print_filter(pid_t pid) __attribute__((noreturn)); // caps.c diff --git a/src/firejail/main.c b/src/firejail/main.c index ff88b9f6e..5a7254de2 100644 --- a/src/firejail/main.c +++ b/src/firejail/main.c @@ -1399,6 +1399,20 @@ int main(int argc, char **argv, char **envp) { else exit_err_feature("seccomp"); } + else if (strcmp(argv[i], "--restrict-namespaces") == 0) { + if (checkcfg(CFG_SECCOMP)) + profile_list_augment(&cfg.restrict_namespaces, "cgroup,ipc,net,mnt,pid,time,user,uts"); + else + exit_err_feature("seccomp"); + } + else if (strncmp(argv[i], "--restrict-namespaces=", 22) == 0) { + if (checkcfg(CFG_SECCOMP)) { + const char *add = argv[i] + 22; + profile_list_augment(&cfg.restrict_namespaces, add); + } + else + exit_err_feature("seccomp"); + } else if (strncmp(argv[i], "--seccomp-error-action=", 23) == 0) { if (checkcfg(CFG_SECCOMP)) { int config_seccomp_error_action = checkcfg(CFG_SECCOMP_ERROR_ACTION); diff --git a/src/firejail/preproc.c b/src/firejail/preproc.c index b25b79a9e..44f82681a 100644 --- a/src/firejail/preproc.c +++ b/src/firejail/preproc.c @@ -91,10 +91,18 @@ void preproc_mount_mnt_dir(void) { copy_file(PATH_SECCOMP_MDWX, RUN_SECCOMP_MDWX, getuid(), getgid(), 0644); // root needed copy_file(PATH_SECCOMP_MDWX_32, RUN_SECCOMP_MDWX_32, getuid(), getgid(), 0644); // root needed } - // as root, create empty RUN_SECCOMP_PROTOCOL and RUN_SECCOMP_POSTEXEC files + // as root, create empty RUN_SECCOMP_PROTOCOL, RUN_SECCOMP_NS and RUN_SECCOMP_POSTEXEC files create_empty_file_as_root(RUN_SECCOMP_PROTOCOL, 0644); if (set_perms(RUN_SECCOMP_PROTOCOL, getuid(), getgid(), 0644)) errExit("set_perms"); + if (cfg.restrict_namespaces) { + create_empty_file_as_root(RUN_SECCOMP_NS, 0644); + if (set_perms(RUN_SECCOMP_NS, getuid(), getgid(), 0644)) + errExit("set_perms"); + create_empty_file_as_root(RUN_SECCOMP_NS_32, 0644); + if (set_perms(RUN_SECCOMP_NS_32, getuid(), getgid(), 0644)) + errExit("set_perms"); + } create_empty_file_as_root(RUN_SECCOMP_POSTEXEC, 0644); if (set_perms(RUN_SECCOMP_POSTEXEC, getuid(), getgid(), 0644)) errExit("set_perms"); diff --git a/src/firejail/profile.c b/src/firejail/profile.c index 1a83a0628..dc1aff49a 100644 --- a/src/firejail/profile.c +++ b/src/firejail/profile.c @@ -26,7 +26,8 @@ extern char *xephyr_screen; -#define MAX_READ 8192 // line buffer for profile files +#define MAX_READ 8192 // line buffer for profile files +#define MAX_LIST 16384 // size limit for argument lists // find and read the profile specified by name from dir directory // return 1 if a profile was found @@ -1042,6 +1043,24 @@ int profile_check_line(char *ptr, int lineno, const char *fname) { return 0; } + // restrict-namespaces + if (strcmp(ptr, "restrict-namespaces") == 0) { + if (checkcfg(CFG_SECCOMP)) + profile_list_augment(&cfg.restrict_namespaces, "cgroup,ipc,net,mnt,pid,time,user,uts"); + else + warning_feature_disabled("seccomp"); + return 0; + } + if (strncmp(ptr, "restrict-namespaces ", 20) == 0) { + if (checkcfg(CFG_SECCOMP)) { + const char *add = ptr + 20; + profile_list_augment(&cfg.restrict_namespaces, add); + } + else + warning_feature_disabled("seccomp"); + return 0; + } + // seccomp error action if (strncmp(ptr, "seccomp-error-action ", 21) == 0) { if (checkcfg(CFG_SECCOMP)) { @@ -1959,4 +1978,10 @@ void profile_list_augment(char **list, const char *items) errExit("asprintf"); free(*list); *list = profile_list_compress(tmp); + + // lists should not grow indefinitely + if (strlen(*list) > MAX_LIST) { + fprintf(stderr, "Error: argument list is too long\n"); + exit(1); + } } diff --git a/src/firejail/sandbox.c b/src/firejail/sandbox.c index e8c4a445a..507e916c8 100644 --- a/src/firejail/sandbox.c +++ b/src/firejail/sandbox.c @@ -1209,6 +1209,16 @@ int sandbox(void* sandbox_arg) { seccomp_load(RUN_SECCOMP_MDWX_32); } + if (cfg.restrict_namespaces) { + seccomp_filter_namespaces(true, cfg.restrict_namespaces); + seccomp_filter_namespaces(false, cfg.restrict_namespaces); + + if (arg_debug) + printf("Install namespaces filter\n"); + seccomp_load(RUN_SECCOMP_NS); // install filter + seccomp_load(RUN_SECCOMP_NS_32); + } + // make seccomp filters read-only fs_remount(RUN_SECCOMP_DIR, MOUNT_READONLY, 0); seccomp_debug(); diff --git a/src/firejail/seccomp.c b/src/firejail/seccomp.c index b8b4ec0d6..84748da77 100644 --- a/src/firejail/seccomp.c +++ b/src/firejail/seccomp.c @@ -416,7 +416,7 @@ int seccomp_filter_mdwx(bool native) { // build the seccomp filter as a regular user int rv = sbox_run(SBOX_USER | SBOX_CAPS_NONE | SBOX_SECCOMP, 3, - PATH_FSECCOMP, command, filter); + PATH_FSECCOMP, command, filter); if (rv) { fprintf(stderr, "Error: cannot build memory-deny-write-execute filter\n"); @@ -429,6 +429,35 @@ int seccomp_filter_mdwx(bool native) { return 0; } +// create namespaces filter +int seccomp_filter_namespaces(bool native, const char *list) { + if (arg_debug) + printf("Build restrict-namespaces filter\n"); + + const char *command, *filter; + if (native) { + command = "restrict-namespaces"; + filter = RUN_SECCOMP_NS; + } else { + command = "restrict-namespaces.32"; + filter = RUN_SECCOMP_NS_32; + } + + // build the seccomp filter as a regular user + int rv = sbox_run(SBOX_USER | SBOX_CAPS_NONE | SBOX_SECCOMP, 4, + PATH_FSECCOMP, command, filter, list); + + if (rv) { + fprintf(stderr, "Error: cannot build restrict-namespaces filter\n"); + exit(rv); + } + + if (arg_debug) + printf("restrict-namespaces filter configured\n"); + + return 0; +} + void seccomp_print_filter(pid_t pid) { EUID_ASSERT(); diff --git a/src/firejail/usage.c b/src/firejail/usage.c index 14cd1f3a4..c3c17393c 100644 --- a/src/firejail/usage.c +++ b/src/firejail/usage.c @@ -214,6 +214,9 @@ static char *usage_str = " --quiet - turn off Firejail's output.\n" " --read-only=filename - set directory or file read-only.\n" " --read-write=filename - set directory or file read-write.\n" + " --restrict-namespaces - seccomp filter that blocks attempts to create new namespaces.\n" + " --restrict-namespaces=namespace,namespace - seccomp filter that blocks attempts\n" + "\tto create specified namespaces.\n" " --rlimit-as=number - set the maximum size of the process's virtual memory.\n" "\t(address space) in bytes.\n" " --rlimit-cpu=number - set the maximum CPU time in seconds.\n" diff --git a/src/fseccomp/fseccomp.h b/src/fseccomp/fseccomp.h index 65337da2a..5911b5156 100644 --- a/src/fseccomp/fseccomp.h +++ b/src/fseccomp/fseccomp.h @@ -61,6 +61,10 @@ void seccomp_keep(const char *fname1, const char *fname2, char *list, bool nativ void memory_deny_write_execute(const char *fname); void memory_deny_write_execute_32(const char *fname); +// namespaces.c +void deny_ns(const char *fname, const char *list); +void deny_ns_32(const char *fname, const char *list); + // seccomp_print void filter_print(const char *fname); diff --git a/src/fseccomp/main.c b/src/fseccomp/main.c index 48665ab71..01d7dd8cf 100644 --- a/src/fseccomp/main.c +++ b/src/fseccomp/main.c @@ -48,6 +48,8 @@ static void usage(void) { printf("\tfseccomp keep32 file1 file2 list\n"); printf("\tfseccomp memory-deny-write-execute file\n"); printf("\tfseccomp memory-deny-write-execute.32 file\n"); + printf("\tfseccomp restrict-namespaces file list\n"); + printf("\tfseccomp restrict-namespaces.32 file list\n"); } int main(int argc, char **argv) { @@ -135,6 +137,10 @@ printf("\n"); memory_deny_write_execute(argv[2]); else if (argc == 3 && strcmp(argv[1], "memory-deny-write-execute.32") == 0) memory_deny_write_execute_32(argv[2]); + else if (argc == 4 && strcmp(argv[1], "restrict-namespaces") == 0) + deny_ns(argv[2], argv[3]); + else if (argc == 4 && strcmp(argv[1], "restrict-namespaces.32") == 0) + deny_ns_32(argv[2], argv[3]); else { fprintf(stderr, "Error fseccomp: invalid arguments\n"); return 1; diff --git a/src/fseccomp/namespaces.c b/src/fseccomp/namespaces.c new file mode 100644 index 000000000..3df23dcff --- /dev/null +++ b/src/fseccomp/namespaces.c @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2014-2022 Firejail Authors + * + * This file is part of firejail project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#define _GNU_SOURCE +#include "fseccomp.h" +#include "../include/seccomp.h" +#include + +#include +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0x02000000 +#endif +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + +// 64-bit architectures +#if INTPTR_MAX == INT64_MAX +#if defined __x86_64__ +// i386 syscalls +#define clone_32 120 +#define clone3_32 435 +#define unshare_32 310 +#define setns_32 346 +#else +#warning 32 bit namespaces filter not implemented yet for your architecture +#endif +#endif + + +static int build_ns_mask(const char *list) { + int mask = 0; + + char *dup = strdup(list); + if (!dup) + errExit("strdup"); + + char *token = strtok(dup, ","); + while (token) { + if (strcmp(token, "cgroup") == 0) + mask |= CLONE_NEWCGROUP; + else if (strcmp(token, "ipc") == 0) + mask |= CLONE_NEWIPC; + else if (strcmp(token, "net") == 0) + mask |= CLONE_NEWNET; + else if (strcmp(token, "mnt") == 0) + mask |= CLONE_NEWNS; + else if (strcmp(token, "pid") == 0) + mask |= CLONE_NEWPID; + else if (strcmp(token, "time") == 0) + mask |= CLONE_NEWTIME; + else if (strcmp(token, "user") == 0) + mask |= CLONE_NEWUSER; + else if (strcmp(token, "uts") == 0) + mask |= CLONE_NEWUTS; + else { + fprintf(stderr, "Error fseccomp: %s is not a valid namespace\n", token); + exit(1); + } + + token = strtok(NULL, ","); + } + + free(dup); + return mask; +} + +void deny_ns(const char *fname, const char *list) { + int mask = build_ns_mask(list); + // CLONE_NEWTIME means something different for clone + // create a second mask without it + int clone_mask = mask & ~CLONE_NEWTIME; + + // open file + int fd = open(fname, O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + fprintf(stderr, "Error fseccomp: cannot open %s file\n", fname); + exit(1); + } + + filter_init(fd, true); + + // build filter + struct sock_filter filter[] = { +#ifdef SYS_clone + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_clone, 0, 4), + // s390 has first and second argument flipped +#if defined __s390__ + EXAMINE_ARGUMENT(1), +#else + EXAMINE_ARGUMENT(0), +#endif + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, clone_mask, 0, 1), + KILL_OR_RETURN_ERRNO, + RETURN_ALLOW, +#endif +#ifdef SYS_clone3 + // cannot inspect clone3 argument because + // seccomp does not dereference pointers + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_clone3, 0, 1), + RETURN_ERRNO(ENOSYS), // hint to use clone instead +#endif +#ifdef SYS_unshare + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_unshare, 0, 4), + EXAMINE_ARGUMENT(0), + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, mask, 0, 1), + KILL_OR_RETURN_ERRNO, + RETURN_ALLOW, +#endif +#ifdef SYS_setns + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_setns, 0, 4), + EXAMINE_ARGUMENT(1), + // always fail if argument is zero + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 1, 0), + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, mask, 0, 1), + KILL_OR_RETURN_ERRNO, + RETURN_ALLOW +#endif + }; + write_to_file(fd, filter, sizeof(filter)); + + filter_end_blacklist(fd); + + // close file + close(fd); +} + +void deny_ns_32(const char *fname, const char *list) { + int mask = build_ns_mask(list); + // CLONE_NEWTIME means something different for clone + // create a second mask without it + int clone_mask = mask & ~CLONE_NEWTIME; + + // open file + int fd = open(fname, O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + fprintf(stderr, "Error fseccomp: cannot open %s file\n", fname); + exit(1); + } + + filter_init(fd, false); + + // build filter + struct sock_filter filter[] = { +#ifdef clone_32 + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, clone_32, 0, 4), + EXAMINE_ARGUMENT(0), + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, clone_mask, 0, 1), + KILL_OR_RETURN_ERRNO, + RETURN_ALLOW, +#endif +#ifdef clone3_32 + // cannot inspect clone3 argument because + // seccomp does not dereference pointers + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, clone3_32, 0, 1), + RETURN_ERRNO(ENOSYS), // hint to use clone instead +#endif +#ifdef unshare_32 + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, unshare_32, 0, 4), + EXAMINE_ARGUMENT(0), + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, mask, 0, 1), + KILL_OR_RETURN_ERRNO, + RETURN_ALLOW, +#endif +#ifdef setns_32 + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, setns_32, 0, 4), + EXAMINE_ARGUMENT(1), + // always fail if argument is zero + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 1, 0), + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, mask, 0, 1), + KILL_OR_RETURN_ERRNO, + RETURN_ALLOW +#endif + }; + write_to_file(fd, filter, sizeof(filter)); + + filter_end_blacklist(fd); + + // close file + close(fd); +} diff --git a/src/include/rundefs.h b/src/include/rundefs.h index 08042d2c4..079670f10 100644 --- a/src/include/rundefs.h +++ b/src/include/rundefs.h @@ -68,6 +68,8 @@ #define RUN_SECCOMP_32 RUN_SECCOMP_DIR "/seccomp.32" // 32bit arch filter installed on 64bit architectures #define RUN_SECCOMP_MDWX RUN_SECCOMP_DIR "/seccomp.mdwx" // filter for memory-deny-write-execute #define RUN_SECCOMP_MDWX_32 RUN_SECCOMP_DIR "/seccomp.mdwx.32" +#define RUN_SECCOMP_NS RUN_SECCOMP_DIR "/seccomp.namespaces" +#define RUN_SECCOMP_NS_32 RUN_SECCOMP_DIR "/seccomp.namespaces.32" #define RUN_SECCOMP_BLOCK_SECONDARY RUN_SECCOMP_DIR "/seccomp.block_secondary" // secondary arch blocking filter #define RUN_SECCOMP_POSTEXEC RUN_SECCOMP_DIR "/seccomp.postexec" // filter for post-exec library #define RUN_SECCOMP_POSTEXEC_32 RUN_SECCOMP_DIR "/seccomp.postexec32" // filter for post-exec library diff --git a/src/man/firejail-profile.txt b/src/man/firejail-profile.txt index 5c8b6031d..be1f55f0f 100644 --- a/src/man/firejail-profile.txt +++ b/src/man/firejail-profile.txt @@ -520,6 +520,12 @@ first argument to socket system call. Recognized values: \fBunix\fR, \fBinet\fR, \fBinet6\fR, \fBnetlink\fR, \fBpacket\fR, and \fBbluetooth\fR. Multiple protocol commands are allowed and they accumulate. .TP +\fBrestrict-namespaces +Install a seccomp filter that blocks attempts to create new cgroup, ipc, net, mount, pid, time, user or uts namespaces. +.TP +\fBrestrict-namespaces cgroup,ipc,net,mnt,pid,time,user,uts +Install a seccomp filter that blocks attempts to create any of the specified namespaces. +.TP \fBseccomp Enable seccomp filter and blacklist the syscalls in the default list. See man 1 firejail for more details. .TP diff --git a/src/man/firejail.txt b/src/man/firejail.txt index c2c0bc297..087d1c85a 100644 --- a/src/man/firejail.txt +++ b/src/man/firejail.txt @@ -693,6 +693,7 @@ Example: .br $ firejail \-\-net=eth0 \-\-defaultgw=10.10.20.1 firefox #endif + .TP \fB\-\-deterministic-exit-code Always exit firejail with the first child's exit status. The default behavior is to use the exit status of the final child to exit, which can be nondeterministic. @@ -2256,6 +2257,29 @@ $ touch ~/test/a $ firejail --read-only=~/test --read-write=~/test/a +.TP +\fB\-\-restrict-namespaces +Install a seccomp filter that blocks attempts to create new cgroup, ipc, net, mount, pid, time, user or uts namespaces. +.br + +.br +Example: +.br +$ firejail \-\-restrict-namespaces + +.TP +\fB\-\-restrict-namespaces=cgroup,ipc,net,mnt,pid,time,user,uts +Install a seccomp filter that blocks attempts to create any of the specified namespaces. The filter examines +the arguments of clone, unshare and setns system calls and returns error EPERM to the process +(or kills it or logs the attempt, see \-\-seccomp-error-action below) if necessary. Note that the filter is not +able to examine the arguments of clone3 system calls, and always responds to these calls with error ENOSYS. +.br + +.br +Example: +.br +$ firejail \-\-restrict-namespaces=user,net + .TP \fB\-\-rlimit-as=number Set the maximum size of the process's virtual memory (address space) in bytes. diff --git a/src/zsh_completion/_firejail.in b/src/zsh_completion/_firejail.in index 8383d83d3..605000e31 100644 --- a/src/zsh_completion/_firejail.in +++ b/src/zsh_completion/_firejail.in @@ -103,7 +103,7 @@ _firejail_args=( '--join-or-start=-[join the sandbox or start a new one name|pid]: :_all_firejails' '--keep-config-pulse[disable automatic ~/.config/pulse init]' '--keep-dev-shm[/dev/shm directory is untouched (even with --private-dev)]' - '--keep-fd[inherit open file descriptors to sandbox]' + '--keep-fd[inherit open file descriptors to sandbox]: :' '--keep-var-tmp[/var/tmp directory is untouched]' '--machine-id[spoof /etc/machine-id with a random id]' '--memory-deny-write-execute[seccomp filter to block attempts to create memory mappings that are both writable and executable]' @@ -141,6 +141,8 @@ _firejail_args=( "--quiet[turn off Firejail's output.]" '*--read-only=-[set directory or file read-only]: :_files' '*--read-write=-[set directory or file read-write]: :_files' + '--restrict-namespaces[seccomp filter that blocks attempts to create new namespaces]' + '--restrict-namespaces=-[seccomp filter that blocks attempts to create specified namespaces]: :' "--rlimit-as=-[set the maximum size of the process's virtual memory (address space) in bytes]: :" '--rlimit-cpu=-[set the maximum CPU time in seconds]: :' '--rlimit-fsize=-[set the maximum file size that can be created by a process]: :' -- cgit v1.2.3-54-g00ecf