#ifndef NOLIBC_IGNORE_ERRNO #define SET_ERRNO(v) do { errno = (v); } while (0) int errno __attribute__((weak)); #else #define SET_ERRNO(v) do { } while (0) #endif
structkernel_clone_args { u64 flags; int __user *pidfd; int __user *child_tid; int __user *parent_tid; constchar *name; int exit_signal; u32 kthread:1; u32 io_thread:1; u32 user_worker:1; u32 no_files:1; unsignedlongstack; unsignedlong stack_size; unsignedlong tls; pid_t *set_tid; /* Number of elements in *set_tid */ size_t set_tid_size; int cgroup; int idle; int (*fn)(void *); void *fn_arg; structcgroup *cgrp; structcss_set *cset; };
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. * * args->exit_signal is expected to be checked for sanity by the caller. */ pid_tkernel_clone(struct kernel_clone_args *args) { u64 clone_flags = args->flags; structcompletionvfork; structpid *pid; structtask_struct *p; int trace = 0; pid_t nr;
/* * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate * field in struct clone_args and it still doesn't make sense to have * them both point at the same memory location. Performing this check * here has the advantage that we don't need to have a separate helper * to check for legacy clone(). */ if ((args->flags & CLONE_PIDFD) && (args->flags & CLONE_PARENT_SETTID) && (args->pidfd == args->parent_tid)) return -EINVAL;
/* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event * for the type of forking is enabled. */ if (!(clone_flags & CLONE_UNTRACED)) { if (clone_flags & CLONE_VFORK) trace = PTRACE_EVENT_VFORK; elseif (args->exit_signal != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK;
if (likely(!ptrace_event_enabled(current, trace))) trace = 0; }
p = copy_process(NULL, trace, NUMA_NO_NODE, args); add_latent_entropy();
if (IS_ERR(p)) return PTR_ERR(p);
/* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ trace_sched_process_fork(current, p);
pid = get_task_pid(p, PIDTYPE_PID); nr = pid_vnr(pid);
if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, args->parent_tid);
/* * One for the user space visible state that goes away when reaped. * One for the scheduler. */ refcount_set(&tsk->rcu_users, 2); /* One for the rcu users */ refcount_set(&tsk->usage, 1); #ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; tsk->worker_private = NULL;
staticvoidaccount_kernel_stack(struct task_struct *tsk, int account) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { structvm_struct *vm = task_stack_vm_area(tsk); int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, account * (PAGE_SIZE / 1024)); } else { void *stack = task_stack_page(tsk);
/* All stack pages are in the same node. */ mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); } }
staticinlinevoid __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, int delta) { if (vmstat_item_in_bytes(item)) { /* * Only cgroups use subpage accounting right now; at * the global level, these items still change in * multiples of whole pages. Store them as pages * internally to keep the per-cpu counters compact. */ VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); delta >>= PAGE_SHIFT; }