Skip to content

Instantly share code, notes, and snippets.

@yuanfeiz
Created May 26, 2013 09:51
Show Gist options
  • Save yuanfeiz/5652271 to your computer and use it in GitHub Desktop.
Save yuanfeiz/5652271 to your computer and use it in GitHub Desktop.
VM parts of fork in MINIX
/* The kernel call implemented in this file:
* m_type: SYS_FORK
*
* The parameters for this kernel call are:
* m1_i1: PR_ENDPT (parent, process that forked)
* m1_i2: PR_SLOT (child's process table slot)
* m1_p1: PR_MEM_PTR (new memory map for the child)
* m1_i3: PR_FORK_FLAGS (fork flags)
*/
#include "kernel/system.h"
#include "kernel/vm.h"
#include <signal.h>
#include <string.h>
#include <assert.h>
#include <minix/endpoint.h>
#include <minix/u64.h>
#if USE_FORK
/*===========================================================================*
* do_fork *
*===========================================================================*/
PUBLIC int do_fork(struct proc * caller, message * m_ptr)
{
/* Handle sys_fork(). PR_ENDPT has forked. The child is PR_SLOT. */
#if (_MINIX_CHIP == _CHIP_INTEL)
reg_t old_ldt_sel;
void *old_fpu_save_area_p;
#endif
register struct proc *rpc; /* child process pointer */
struct proc *rpp; /* parent process pointer */
struct mem_map *map_ptr; /* virtual address of map inside caller (PM) */
int gen, r;
int p_proc;
if(!isokendpt(m_ptr->PR_ENDPT, &p_proc))
return EINVAL;
// 父进程。
rpp = proc_addr(p_proc);
// 子进程。
rpc = proc_addr(m_ptr->PR_SLOT);
if (isemptyp(rpp) || ! isemptyp(rpc)) return(EINVAL);
assert(!(rpp->p_misc_flags & MF_DELIVERMSG));
/* needs to be receiving so we know where the message buffer is */
if(!RTS_ISSET(rpp, RTS_RECEIVING)) {
printf("kernel: fork not done synchronously?\n");
return EINVAL;
}
map_ptr= (struct mem_map *) m_ptr->PR_MEM_PTR;
/* make sure that the FPU context is saved in parent before copy */
if (fpu_owner == rpp) {
disable_fpu_exception();
save_fpu(rpp);
}
/* Copy parent 'proc' struct to child. And reinitialize some fields. */
gen = _ENDPOINT_G(rpc->p_endpoint);
#if (_MINIX_CHIP == _CHIP_INTEL)
old_ldt_sel = rpc->p_seg.p_ldt_sel; /* backup local descriptors */
old_fpu_save_area_p = rpc->p_fpu_state.fpu_save_area_p;
#endif
// 拷贝内核中父进程的proc到子进程的proc
*rpc = *rpp; /* copy 'proc' struct */
#if (_MINIX_CHIP == _CHIP_INTEL)
rpc->p_seg.p_ldt_sel = old_ldt_sel; /* restore descriptors */
rpc->p_fpu_state.fpu_save_area_p = old_fpu_save_area_p;
if(proc_used_fpu(rpp))
memcpy(rpc->p_fpu_state.fpu_save_area_p,
rpp->p_fpu_state.fpu_save_area_p,
FPU_XFP_SIZE);
#endif
if(++gen >= _ENDPOINT_MAX_GENERATION) /* increase generation */
gen = 1; /* generation number wraparound */
rpc->p_nr = m_ptr->PR_SLOT; /* this was obliterated by copy */
rpc->p_endpoint = _ENDPOINT(gen, rpc->p_nr); /* new endpoint of slot */
// 这些信息就只能在内核调用里边完成了,因为pm完全就不知道有进程调度这回事。
rpc->p_reg.retreg = 0; /* child sees pid = 0 to know it is child */
rpc->p_user_time = 0; /* set all the accounting times to 0 */
rpc->p_sys_time = 0;
rpc->p_reg.psw &= ~TRACEBIT; /* clear trace bit */
rpc->p_misc_flags &= ~(MF_VIRT_TIMER | MF_PROF_TIMER | MF_SC_TRACE);
rpc->p_virt_left = 0; /* disable, clear the process-virtual timers */
rpc->p_prof_left = 0;
/* the child process is not runnable until it's scheduled. */
RTS_SET(rpc, RTS_NO_QUANTUM);
make_zero64(rpc->p_cpu_time_left);
make_zero64(rpc->p_cycles);
/* If the parent is a privileged process, take away the privileges from the
* child process and inhibit it from running by setting the NO_PRIV flag.
* The caller should explicitely set the new privileges before executing.
*/
if (priv(rpp)->s_flags & SYS_PROC) {
rpc->p_priv = priv_addr(USER_PRIV_ID);
rpc->p_rts_flags |= RTS_NO_PRIV;
}
/* Calculate endpoint identifier, so caller knows what it is. */
m_ptr->PR_ENDPT = rpc->p_endpoint;
m_ptr->PR_FORK_MSGADDR = (char *) rpp->p_delivermsg_vir;
/* Install new map */
r = newmap(caller, rpc, map_ptr);
/* Don't schedule process in VM mode until it has a new pagetable. */
if(m_ptr->PR_FORK_FLAGS & PFF_VMINHIBIT) {
RTS_SET(rpc, RTS_VMINHIBIT);
}
/*
* Only one in group should have RTS_SIGNALED, child doesn't inherit tracing.
*/
RTS_UNSET(rpc, (RTS_SIGNALED | RTS_SIG_PENDING | RTS_P_STOP));
(void) sigemptyset(&rpc->p_pending);
rpc->p_seg.p_cr3 = 0;
rpc->p_seg.p_cr3_v = NULL;
return r;
}
#endif /* USE_FORK */
#define _SYSTEM 1
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/config.h>
#include <minix/const.h>
#include <minix/ds.h>
#include <minix/endpoint.h>
#include <minix/keymap.h>
#include <minix/minlib.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/debug.h>
#include <minix/bitmap.h>
#include <string.h>
#include <errno.h>
#include <env.h>
#include <assert.h>
#include "glo.h"
#include "vm.h"
#include "proto.h"
#include "util.h"
#include "sanitycheck.h"
#include "region.h"
#include "memory.h"
/*===========================================================================*
* do_fork *
*===========================================================================*/
PUBLIC int do_fork(message *msg)
{
int r, proc, s, childproc, fullvm;
struct vmproc *vmp, *vmc;
pt_t origpt;
vir_bytes msgaddr;
SANITYCHECK(SCL_FUNCTIONS);
// proc现在是父进程的mproc表项索引。
if(vm_isokendpt(msg->VMF_ENDPOINT, &proc) != OK) {
printf("VM: bogus endpoint VM_FORK %d\n", msg->VMF_ENDPOINT);
SANITYCHECK(SCL_FUNCTIONS);
return EINVAL;
}
// childproc是子进程的mproc表项索引。
childproc = msg->VMF_SLOTNO;
if(childproc < 0 || childproc >= NR_PROCS) {
printf("VM: bogus slotno VM_FORK %d\n", msg->VMF_SLOTNO);
SANITYCHECK(SCL_FUNCTIONS);
return EINVAL;
}
// vmproc和mproc表中表项是一一对应的。
// 此时父进程的vmproc是用内容的,而子进程的vmproc还是待填充的。
vmp = &vmproc[proc]; /* parent */
vmc = &vmproc[childproc]; /* child */
assert(vmc->vm_slot == childproc);
if(vmp->vm_flags & VMF_HAS_DMA) {
printf("VM: %d has DMA memory and may not fork\n", msg->VMF_ENDPOINT);
return EINVAL;
}
// fullvm表示父进程内存管理是分页还是分段。
// 0:分段
// 1:分页
fullvm = vmp->vm_flags & VMF_HASPT;
/* The child is basically a copy of the parent. */
// 根据父进程vmproc,填子进程vmproc的__基本信息__。
origpt = vmc->vm_pt;
*vmc = *vmp;
vmc->vm_slot = childproc;
vmc->vm_regions = NULL;
yielded_init(&vmc->vm_yielded_blocks);
vmc->vm_endpoint = NONE; /* In case someone tries to use it. */
vmc->vm_pt = origpt;
vmc->vm_flags &= ~VMF_HASPT;
#if VMSTATS
vmc->vm_bytecopies = 0;
#endif
// 分配页表的头指针。
if(pt_new(&vmc->vm_pt) != OK) {
printf("VM: fork: pt_new failed\n");
return ENOMEM;
}
// 子进程的内存管理统统设为分页?
vmc->vm_flags |= VMF_HASPT;
if(fullvm) {
// 父进程是分页式内存管理。
SANITYCHECK(SCL_DETAIL);
// 把vmp的内容全部拷贝到vmc中。
if(map_proc_copy(vmc, vmp) != OK) {
printf("VM: fork: map_proc_copy failed\n");
pt_free(&vmc->vm_pt);
return(ENOMEM);
}
if(vmp->vm_heap) {
vmc->vm_heap = map_region_lookup_tag(vmc, VRT_HEAP);
assert(vmc->vm_heap);
}
SANITYCHECK(SCL_DETAIL);
}
else {
// 父进程是分段式内存管理。
vir_bytes sp;
struct vir_region *heap, *stack;
vir_bytes text_bytes, data_bytes, stack_bytes, parent_gap_bytes,
child_gap_bytes;
/* Get SP of new process (using parent). */
if(get_stack_ptr(vmp->vm_endpoint, &sp) != OK) {
printf("VM: fork: get_stack_ptr failed for %d\n",
vmp->vm_endpoint);
return ENOMEM;
}
/* Update size of stack segment using current SP. */
if(adjust(vmp, vmp->vm_arch.vm_seg[D].mem_len, sp) != OK) {
printf("VM: fork: adjust failed for %d\n",
vmp->vm_endpoint);
return ENOMEM;
}
/* Copy newly adjust()ed stack segment size to child. */
// 不科学啊,父子共享栈段是个什么用意?
vmc->vm_arch.vm_seg[S] = vmp->vm_arch.vm_seg[S];
text_bytes = CLICK2ABS(vmc->vm_arch.vm_seg[T].mem_len);
data_bytes = CLICK2ABS(vmc->vm_arch.vm_seg[D].mem_len);
stack_bytes = CLICK2ABS(vmc->vm_arch.vm_seg[S].mem_len);
/* how much space after break and before lower end (which is the
* logical top) of stack for the parent
*/
parent_gap_bytes = CLICK2ABS(vmc->vm_arch.vm_seg[S].mem_vir -
vmc->vm_arch.vm_seg[D].mem_len);
/* how much space can the child stack grow downwards, below
* the current SP? The rest of the gap is available for the
* heap to grow upwards.
*/
child_gap_bytes = VM_PAGE_SIZE;
// 给子进程分配内存空间。
if((r=proc_new(vmc, VM_PROCSTART,
text_bytes, data_bytes, stack_bytes, child_gap_bytes, 0, 0,
CLICK2ABS(vmc->vm_arch.vm_seg[S].mem_vir +
vmc->vm_arch.vm_seg[S].mem_len), 1)) != OK) {
printf("VM: fork: proc_new failed\n");
return r;
}
if(!(heap = map_region_lookup_tag(vmc, VRT_HEAP)))
panic("couldn't lookup heap");
assert(heap->phys);
if(!(stack = map_region_lookup_tag(vmc, VRT_STACK)))
panic("couldn't lookup stack");
assert(stack->phys);
/* Now copy the memory regions. */
// 拷贝text段
if(vmc->vm_arch.vm_seg[T].mem_len > 0) {
struct vir_region *text;
if(!(text = map_region_lookup_tag(vmc, VRT_TEXT)))
panic("couldn't lookup text");
assert(text->phys);
if(copy_abs2region(CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys),
text, 0, text_bytes) != OK)
panic("couldn't copy text");
}
// 拷贝data段
if(copy_abs2region(CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys),
heap, 0, data_bytes) != OK)
panic("couldn't copy heap");
// 拷贝stack段
if(copy_abs2region(CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys +
vmc->vm_arch.vm_seg[D].mem_len) + parent_gap_bytes,
stack, child_gap_bytes, stack_bytes) != OK)
panic("couldn't copy stack");
}
// 到这个地方,内存相关的东西就全部拷贝完了。
/* Only inherit these flags. */
vmc->vm_flags &= (VMF_INUSE|VMF_SEPARATE|VMF_HASPT);
/* inherit the priv call bitmaps */
memcpy(&vmc->vm_call_mask, &vmp->vm_call_mask, sizeof(vmc->vm_call_mask));
/* Tell kernel about the (now successful) FORK. */
// 告诉内核fork成功,内核中主要也是做一些标识设置。
if((r=sys_fork(vmp->vm_endpoint, childproc,
&vmc->vm_endpoint, vmc->vm_arch.vm_seg,
PFF_VMINHIBIT, &msgaddr)) != OK) {
panic("do_fork can't sys_fork: %d", r);
}
if(fullvm) {
vir_bytes vir;
/* making these messages writable is an optimisation
* and its return value needn't be checked.
*/
vir = arch_vir2map(vmc, msgaddr);
handle_memory(vmc, vir, sizeof(message), 1);
vir = arch_vir2map(vmp, msgaddr);
handle_memory(vmp, vir, sizeof(message), 1);
}
if((r=pt_bind(&vmc->vm_pt, vmc)) != OK)
panic("fork can't pt_bind: %d", r);
/* Inform caller of new child endpoint. */
msg->VMF_CHILD_ENDPOINT = vmc->vm_endpoint;
SANITYCHECK(SCL_FUNCTIONS);
return OK;
}
/* This file deals with creating processes (via FORK) and deleting them (via
* EXIT/WAIT). When a process forks, a new slot in the 'mproc' table is
* allocated for it, and a copy of the parent's core image is made for the
* child. Then the kernel and file system are informed. A process is removed
* from the 'mproc' table when two events have occurred: (1) it has exited or
* been killed by a signal, and (2) the parent has done a WAIT. If the process
* exits first, it continues to occupy a slot until the parent does a WAIT.
*
* The entry points into this file are:
* do_fork: perform the FORK system call
* do_srv_fork: special FORK, used by RS to create sys services
* do_exit: perform the EXIT system call (by calling exit_proc())
* exit_proc: actually do the exiting, and tell VFS about it
* exit_restart: continue exiting a process after VFS has replied
* do_waitpid: perform the WAITPID or WAIT system call
* wait_test: check whether a parent is waiting for a child
*/
#include "pm.h"
#include <sys/wait.h>
#include <assert.h>
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/sched.h>
#include <minix/vm.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <signal.h>
#include "mproc.h"
#include "param.h"
#define LAST_FEW 2 /* last few slots reserved for superuser */
FORWARD _PROTOTYPE (void zombify, (struct mproc *rmp) );
FORWARD _PROTOTYPE (void check_parent, (struct mproc *child,
int try_cleanup) );
FORWARD _PROTOTYPE (void tell_parent, (struct mproc *child) );
FORWARD _PROTOTYPE (void tell_tracer, (struct mproc *child) );
FORWARD _PROTOTYPE (void tracer_died, (struct mproc *child) );
FORWARD _PROTOTYPE (void cleanup, (register struct mproc *rmp) );
/*===========================================================================*
* do_fork *
*===========================================================================*/
PUBLIC int do_fork() // 父进程已经存在,创建一个子进程
{
/* The process pointed to by 'mp' has forked. Create a child process. */
register struct mproc *rmp; /* pointer to parent */
register struct mproc *rmc; /* pointer to child */
pid_t new_pid;
static int next_child;
int i, n = 0, s;
endpoint_t child_ep;
message m;
/* If tables might fill up during FORK, don't even start since recovery half
* way through is such a nuisance.
*/
rmp = mp;
// 如果进程数已经达到最大值,或者预留给超级用户的位置已经不够了,就放弃fork。
if ((procs_in_use == NR_PROCS) ||
(procs_in_use >= NR_PROCS-LAST_FEW && rmp->mp_effuid != 0))
{
printf("PM: warning, process table is full!\n");
return(EAGAIN);
}
/* Find a slot in 'mproc' for the child process. A slot must exist. */
// 为子进程在mproc中找一个位置,从小到大。话说这里next_child不需要赋初值么?
do {
next_child = (next_child+1) % NR_PROCS;
n++;
} while((mproc[next_child].mp_flags & IN_USE) && n <= NR_PROCS);
if(n > NR_PROCS)
panic("do_fork can't find child slot");
if(next_child < 0 || next_child >= NR_PROCS
|| (mproc[next_child].mp_flags & IN_USE))
panic("do_fork finds wrong child slot: %d", next_child);
// 到这里就已经在mproc中找到了一个能存放子进程的地方。
/* Memory part of the forking. */
// 为子进程分配内存。在vm_fork中,若能够成功分配内存,vm会通知内核。
if((s=vm_fork(rmp->mp_endpoint, next_child, &child_ep)) != OK) {
printf("PM: vm_fork failed: %d\n", s);
return s;
}
/* PM may not fail fork after call to vm_fork(), as VM calls sys_fork(). */
// 到这里就已经分配好了内存,并通知了内核。
// 那还有什么是没有做的呢?
rmc = &mproc[next_child];
/* Set up the child and its memory map; copy its 'mproc' slot from parent. */
procs_in_use++;
// 复制父进程的mproc项给子进程。
*rmc = *rmp; /* copy parent's process slot to child's */
// 把子进程的父进程指向父进程。
rmc->mp_parent = who_p; /* record child's parent */
if (!(rmc->mp_trace_flags & TO_#include "syslib.h"
PUBLIC int sys_fork(parent, child, child_endpoint, map_ptr, flags, msgaddr)
endpoint_t parent; /* process doing the fork */
endpoint_t child; /* which proc has been created by the fork */
endpoint_t *child_endpoint;
struct mem_map *map_ptr;
u32_t flags;
vir_bytes *msgaddr;
{
/* A process has forked. Tell the kernel. */
message m;
int r;
m.PR_ENDPT = parent;
m.PR_SLOT = child;
// 这个指针指向子进程的地址空间。
m.PR_MEM_PTR = (char *) map_ptr;
m.PR_FORK_FLAGS = flags;
// 内核调用没有服务器,只需要发送一个调用号即可。
r = _kernel_call(SYS_FORK, &m);
*child_endpoint = m.PR_ENDPT;
*msgaddr = (vir_bytes) m.PR_FORK_MSGADDR;
return r;
}
TRACEFORK)) {
rmc->mp_tracer = NO_TRACER; /* no tracer attached */
rmc->mp_trace_flags = 0;
(void) sigemptyset(&rmc->mp_sigtrace);
}
/* Some system servers like to call regular fork, such as RS spawning
* recovery scripts; in this case PM will take care of their scheduling
* because RS cannot do so for non-system processes */
if (rmc->mp_flags & PRIV_PROC) {
assert(rmc->mp_scheduler == NONE);
rmc->mp_scheduler = SCHED_PROC_NR;
}
/* Inherit only these flags. In normal fork(), PRIV_PROC is not inherited. */
rmc->mp_flags &= (IN_USE|DELAY_CALL);
// 重置各种状态标志位,比如时间、退出状态等等。
rmc->mp_child_utime = 0; /* reset administration */
rmc->mp_child_stime = 0; /* reset administration */
rmc->mp_exitstatus = 0;
rmc->mp_sigstatus = 0;
rmc->mp_endpoint = child_ep; /* passed back by VM */
for (i = 0; i < NR_ITIMERS; i++)
rmc->mp_interval[i] = 0; /* reset timer intervals */
/* Find a free pid for the child and put it in the table. */
// 好吧,在mproc中的索引号不是子进程的pid,这里给他找一个进程号。
new_pid = get_free_pid();
rmc->mp_pid = new_pid; /* assign pid to child */
// 找到pid之后,告诉vfs。
m.m_type = PM_FORK;
m.PM_PROC = rmc->mp_endpoint;
m.PM_PPROC = rmp->mp_endpoint;
m.PM_CPID = rmc->mp_pid;
tell_vfs(rmc, &m);
/* Tell the tracer, if any, about the new child */
if (rmc->mp_tracer != NO_TRACER)
sig_proc(rmc, SIGSTOP, TRUE /*trace*/, FALSE /* ksig */);
/* Do not reply until VFS is ready to process the fork
* request
*/
return SUSPEND;
}
/*===========================================================================*
* do_srv_fork *
*===========================================================================*/
PUBLIC int do_srv_fork()
{
/* The process pointed to by 'mp' has forked. Create a child process. */
register struct mproc *rmp; /* pointer to parent */
register struct mproc *rmc; /* pointer to child */
int s;
pid_t new_pid;
static int next_child;
int i, n = 0;
endpoint_t child_ep;
message m;
/* Only RS is allowed to use srv_fork. */
if (mp->mp_endpoint != RS_PROC_NR)
return EPERM;
/* If tables might fill up during FORK, don't even start since recovery half
* way through is such a nuisance.
*/
rmp = mp;
if ((procs_in_use == NR_PROCS) ||
(procs_in_use >= NR_PROCS-LAST_FEW && rmp->mp_effuid != 0))
{
printf("PM: warning, process table is full!\n");
return(EAGAIN);
}
/* Find a slot in 'mproc' for the child process. A slot must exist. */
do {
next_child = (next_child+1) % NR_PROCS;
n++;
} while((mproc[next_child].mp_flags & IN_USE) && n <= NR_PROCS);
if(n > NR_PROCS)
panic("do_fork can't find child slot");
if(next_child < 0 || next_child >= NR_PROCS
|| (mproc[next_child].mp_flags & IN_USE))
panic("do_fork finds wrong child slot: %d", next_child);
if((s=vm_fork(rmp->mp_endpoint, next_child, &child_ep)) != OK) {
printf("PM: vm_fork failed: %d\n", s);
return s;
}
rmc = &mproc[next_child];
/* Set up the child and its memory map; copy its 'mproc' slot from parent. */
procs_in_use++;
*rmc = *rmp; /* copy parent's process slot to child's */
rmc->mp_parent = who_p; /* record child's parent */
if (!(rmc->mp_trace_flags & TO_TRACEFORK)) {
rmc->mp_tracer = NO_TRACER; /* no tracer attached */
rmc->mp_trace_flags = 0;
(void) sigemptyset(&rmc->mp_sigtrace);
}
/* inherit only these flags */
rmc->mp_flags &= (IN_USE|PRIV_PROC|DELAY_CALL);
rmc->mp_child_utime = 0; /* reset administration */
rmc->mp_child_stime = 0; /* reset administration */
rmc->mp_exitstatus = 0;
rmc->mp_sigstatus = 0;
rmc->mp_endpoint = child_ep; /* passed back by VM */
for (i = 0; i < NR_ITIMERS; i++)
rmc->mp_interval[i] = 0; /* reset timer intervals */
/* Find a free pid for the child and put it in the table. */
new_pid = get_free_pid();
rmc->mp_pid = new_pid; /* assign pid to child */
m.m_type = PM_SRV_FORK;
m.PM_PROC = rmc->mp_endpoint;
m.PM_PPROC = rmp->mp_endpoint;
m.PM_CPID = rmc->mp_pid;
tell_vfs(rmc, &m);
/* Tell the tracer, if any, about the new child */
if (rmc->mp_tracer != NO_TRACER)
sig_proc(rmc, SIGSTOP, TRUE /*trace*/, FALSE /* ksig */);
/* Wakeup the newly created process */
setreply(rmc-mproc, OK);
return rmc->mp_pid;
}
/*===========================================================================*
* do_exit *
*===========================================================================*/
PUBLIC int do_exit()
{
/* Perform the exit(status) system call. The real work is done by exit_proc(),
* which is also called when a process is killed by a signal. System processes
* do not use PM's exit() to terminate. If they try to, we warn the user
* and send a SIGKILL signal to the system process.
*/
if(mp->mp_flags & PRIV_PROC) {
printf("PM: system process %d (%s) tries to exit(), sending SIGKILL\n",
mp->mp_endpoint, mp->mp_name);
sys_kill(mp->mp_endpoint, SIGKILL);
}
else {
exit_proc(mp, m_in.status, FALSE /*dump_core*/);
}
return(SUSPEND); /* can't communicate from beyond the grave */
}
/*===========================================================================*
* exit_proc *
*===========================================================================*/
PUBLIC void exit_proc(rmp, exit_status, dump_core)
register struct mproc *rmp; /* pointer to the process to be terminated */
int exit_status; /* the process' exit status (for parent) */
int dump_core; /* flag indicating whether to dump core */
{
/* A process is done. Release most of the process' possessions. If its
* parent is waiting, release the rest, else keep the process slot and
* become a zombie.
*/
register int proc_nr, proc_nr_e;
int r;
pid_t procgrp;
struct mproc *p_mp;
clock_t user_time, sys_time;
message m;
/* Do not create core files for set uid execution */
if (dump_core && rmp->mp_realuid != rmp->mp_effuid)
dump_core = FALSE;
/* System processes are destroyed before informing VFS, meaning that VFS can
* not get their CPU state, so we can't generate a coredump for them either.
*/
if (dump_core && (rmp->mp_flags & PRIV_PROC))
dump_core = FALSE;
proc_nr = (int) (rmp - mproc); /* get process slot number */
proc_nr_e = rmp->mp_endpoint;
/* Remember a session leader's process group. */
procgrp = (rmp->mp_pid == mp->mp_procgrp) ? mp->mp_procgrp : 0;
/* If the exited process has a timer pending, kill it. */
if (rmp->mp_flags & ALARM_ON) set_alarm(rmp, (clock_t) 0);
/* Do accounting: fetch usage times and accumulate at parent. */
if((r=sys_times(proc_nr_e, &user_time, &sys_time, NULL, NULL)) != OK)
panic("exit_proc: sys_times failed: %d", r);
p_mp = &mproc[rmp->mp_parent]; /* process' parent */
p_mp->mp_child_utime += user_time + rmp->mp_child_utime; /* add user time */
p_mp->mp_child_stime += sys_time + rmp->mp_child_stime; /* add system time */
/* Tell the kernel the process is no longer runnable to prevent it from
* being scheduled in between the following steps. Then tell VFS that it
* the process has exited and finally, clean up the process at the kernel.
* This order is important so that VFS can tell drivers to cancel requests
* such as copying to/ from the exiting process, before it is gone.
*/
if ((r = sys_stop(proc_nr_e)) != OK) /* stop the process */
panic("sys_stop failed: %d", r);
if((r=vm_willexit(proc_nr_e)) != OK) {
panic("exit_proc: vm_willexit failed: %d", r);
}
vm_notify_sig_wrapper(rmp->mp_endpoint);
if (proc_nr_e == INIT_PROC_NR)
{
printf("PM: INIT died\n");
return;
}
if (proc_nr_e == VFS_PROC_NR)
{
panic("exit_proc: VFS died: %d", r);
}
/* Tell VFS about the exiting process. */
m.m_type = dump_core ? PM_DUMPCORE : PM_EXIT;
m.PM_PROC = rmp->mp_endpoint;
tell_vfs(rmp, &m);
if (rmp->mp_flags & PRIV_PROC)
{
/* Destroy system processes without waiting for VFS. This is
* needed because the system process might be a block device
* driver that VFS is blocked waiting on.
*/
if((r= sys_clear(rmp->mp_endpoint)) != OK)
panic("exit_proc: sys_clear failed: %d", r);
}
/* Clean up most of the flags describing the process's state before the exit,
* and mark it as exiting.
*/
rmp->mp_flags &= (IN_USE|VFS_CALL|PRIV_PROC|TRACE_EXIT);
rmp->mp_flags |= EXITING;
/* Keep the process around until VFS is finished with it. */
rmp->mp_exitstatus = (char) exit_status;
/* For normal exits, try to notify the parent as soon as possible.
* For core dumps, notify the parent only once the core dump has been made.
*/
if (!dump_core)
zombify(rmp);
/* If the process has children, disinherit them. INIT is the new parent. */
for (rmp = &mproc[0]; rmp < &mproc[NR_PROCS]; rmp++) {
if (!(rmp->mp_flags & IN_USE)) continue;
if (rmp->mp_tracer == proc_nr) {
/* This child's tracer died. Do something sensible. */
tracer_died(rmp);
}
if (rmp->mp_parent == proc_nr) {
/* 'rmp' now points to a child to be disinherited. */
rmp->mp_parent = INIT_PROC_NR;
/* Notify new parent. */
if (rmp->mp_flags & ZOMBIE)
check_parent(rmp, TRUE /*try_cleanup*/);
}
}
/* Send a hangup to the process' process group if it was a session leader. */
if (procgrp != 0) check_sig(-procgrp, SIGHUP, FALSE /* ksig */);
}
/*===========================================================================*
* exit_restart *
*===========================================================================*/
PUBLIC void exit_restart(rmp, dump_core)
struct mproc *rmp; /* pointer to the process being terminated */
int dump_core; /* flag indicating whether to dump core */
{
/* VFS replied to our exit or coredump request. Perform the second half of the
* exit code.
*/
int r;
if((r = sched_stop(rmp->mp_scheduler, rmp->mp_endpoint)) != OK) {
/* If the scheduler refuses to give up scheduling, there is
* little we can do, except report it. This may cause problems
* later on, if this scheduler is asked to schedule another proc
* that has an endpoint->schedproc mapping identical to the proc
* we just tried to stop scheduling.
*/
printf("PM: The scheduler did not want to give up "
"scheduling %s, ret=%d.\n", rmp->mp_name, r);
}
/* sched_stop is either called when the process is exiting or it is
* being moved between schedulers. If it is being moved between
* schedulers, we need to set the mp_scheduler to NONE so that PM
* doesn't forward messages to the process' scheduler while being moved
* (such as sched_nice). */
rmp->mp_scheduler = NONE;
/* For core dumps, now is the right time to try to contact the parent. */
if (dump_core)
zombify(rmp);
if (!(rmp->mp_flags & PRIV_PROC))
{
/* destroy the (user) process */
if((r=sys_clear(rmp->mp_endpoint)) != OK)
panic("exit_restart: sys_clear failed: %d", r);
}
/* Release the memory occupied by the child. */
if((r=vm_exit(rmp->mp_endpoint)) != OK) {
panic("exit_restart: vm_exit failed: %d", r);
}
if (rmp->mp_flags & TRACE_EXIT)
{
/* Wake up the tracer, completing the ptrace(T_EXIT) call */
mproc[rmp->mp_tracer].mp_reply.reply_trace = 0;
setreply(rmp->mp_tracer, OK);
}
/* Clean up if the parent has collected the exit status */
if (rmp->mp_flags & TOLD_PARENT)
cleanup(rmp);
}
/*===========================================================================*
* do_waitpid *
*===========================================================================*/
PUBLIC int do_waitpid()
{
/* A process wants to wait for a child to terminate. If a child is already
* waiting, go clean it up and let this WAIT call terminate. Otherwise,
* really wait.
* A process calling WAIT never gets a reply in the usual way at the end
* of the main loop (unless WNOHANG is set or no qualifying child exists).
* If a child has already exited, the routine tell_parent() sends the reply
* to awaken the caller.
* Both WAIT and WAITPID are handled by this code.
*/
register struct mproc *rp;
int i, pidarg, options, children;
/* Set internal variables, depending on whether this is WAIT or WAITPID. */
pidarg = (call_nr == WAIT ? -1 : m_in.pid); /* 1st param of waitpid */
options = (call_nr == WAIT ? 0 : m_in.sig_nr); /* 3rd param of waitpid */
if (pidarg == 0) pidarg = -mp->mp_procgrp; /* pidarg < 0 ==> proc grp */
/* Is there a child waiting to be collected? At this point, pidarg != 0:
* pidarg > 0 means pidarg is pid of a specific process to wait for
* pidarg == -1 means wait for any child
* pidarg < -1 means wait for any child whose process group = -pidarg
*/
children = 0;
for (rp = &mproc[0]; rp < &mproc[NR_PROCS]; rp++) {
if ((rp->mp_flags & (IN_USE | TOLD_PARENT)) != IN_USE) continue;
if (rp->mp_parent != who_p && rp->mp_tracer != who_p) continue;
if (rp->mp_parent != who_p && (rp->mp_flags & ZOMBIE)) continue;
/* The value of pidarg determines which children qualify. */
if (pidarg > 0 && pidarg != rp->mp_pid) continue;
if (pidarg < -1 && -pidarg != rp->mp_procgrp) continue;
children++; /* this child is acceptable */
if (rp->mp_tracer == who_p) {
if (rp->mp_flags & TRACE_ZOMBIE) {
/* Traced child meets the pid test and has exited. */
tell_tracer(rp);
check_parent(rp, TRUE /*try_cleanup*/);
return(SUSPEND);
}
if (rp->mp_flags & STOPPED) {
/* This child meets the pid test and is being traced.
* Deliver a signal to the tracer, if any.
*/
for (i = 1; i < _NSIG; i++) {
if (sigismember(&rp->mp_sigtrace, i)) {
(void) sigdelset(&rp->mp_sigtrace, i);
mp->mp_reply.reply_res2 =
0177 | (i << 8);
return(rp->mp_pid);
}
}
}
}
if (rp->mp_parent == who_p) {
if (rp->mp_flags & ZOMBIE) {
/* This child meets the pid test and has exited. */
tell_parent(rp); /* this child has already exited */
if (!(rp->mp_flags & VFS_CALL))
cleanup(rp);
return(SUSPEND);
}
}
}
/* No qualifying child has exited. Wait for one, unless none exists. */
if (children > 0) {
/* At least 1 child meets the pid test exists, but has not exited. */
if (options & WNOHANG) {
return(0); /* parent does not want to wait */
}
mp->mp_flags |= WAITING; /* parent wants to wait */
mp->mp_wpid = (pid_t) pidarg; /* save pid for later */
return(SUSPEND); /* do not reply, let it wait */
} else {
/* No child even meets the pid test. Return error immediately. */
return(ECHILD); /* no - parent has no children */
}
}
/*===========================================================================*
* wait_test *
*===========================================================================*/
PUBLIC int wait_test(rmp, child)
struct mproc *rmp; /* process that may be waiting */
struct mproc *child; /* process that may be waited for */
{
/* See if a parent or tracer process is waiting for a child process.
* A tracer is considered to be a pseudo-parent.
*/
int parent_waiting, right_child;
pid_t pidarg;
pidarg = rmp->mp_wpid; /* who's being waited for? */
parent_waiting = rmp->mp_flags & WAITING;
right_child = /* child meets one of the 3 tests? */
(pidarg == -1 || pidarg == child->mp_pid ||
-pidarg == child->mp_procgrp);
return (parent_waiting && right_child);
}
/*===========================================================================*
* zombify *
*===========================================================================*/
PRIVATE void zombify(rmp)
struct mproc *rmp;
{
/* Zombify a process. First check if the exiting process is traced by a process
* other than its parent; if so, the tracer must be notified about the exit
* first. Once that is done, the real parent may be notified about the exit of
* its child.
*/
struct mproc *t_mp;
if (rmp->mp_flags & (TRACE_ZOMBIE | ZOMBIE))
panic("zombify: process was already a zombie");
/* See if we have to notify a tracer process first. */
if (rmp->mp_tracer != NO_TRACER && rmp->mp_tracer != rmp->mp_parent) {
rmp->mp_flags |= TRACE_ZOMBIE;
t_mp = &mproc[rmp->mp_tracer];
/* Do not bother sending SIGCHLD signals to tracers. */
if (!wait_test(t_mp, rmp))
return;
tell_tracer(rmp);
}
else {
rmp->mp_flags |= ZOMBIE;
}
/* No tracer, or tracer is parent, or tracer has now been notified. */
check_parent(rmp, FALSE /*try_cleanup*/);
}
/*===========================================================================*
* check_parent *
*===========================================================================*/
PRIVATE void check_parent(child, try_cleanup)
struct mproc *child; /* tells which process is exiting */
int try_cleanup; /* clean up the child when done? */
{
/* We would like to inform the parent of an exiting child about the child's
* death. If the parent is waiting for the child, tell it immediately;
* otherwise, send it a SIGCHLD signal.
*
* Note that we may call this function twice on a single child; first with
* its original parent, later (if the parent died) with INIT as its parent.
*/
struct mproc *p_mp;
p_mp = &mproc[child->mp_parent];
if (p_mp->mp_flags & EXITING) {
/* This may trigger if the child of a dead parent dies. The child will
* be assigned to INIT and rechecked shortly after. Do nothing.
*/
}
else if (wait_test(p_mp, child)) {
tell_parent(child);
/* The 'try_cleanup' flag merely saves us from having to be really
* careful with statement ordering in exit_proc() and exit_restart().
*/
if (try_cleanup && !(child->mp_flags & VFS_CALL))
cleanup(child);
}
else {
/* Parent is not waiting. */
sig_proc(p_mp, SIGCHLD, TRUE /*trace*/, FALSE /* ksig */);
}
}
/*===========================================================================*
* tell_parent *
*===========================================================================*/
PRIVATE void tell_parent(child)
register struct mproc *child; /* tells which process is exiting */
{
int exitstatus, mp_parent;
struct mproc *parent;
mp_parent= child->mp_parent;
if (mp_parent <= 0)
panic("tell_parent: bad value in mp_parent: %d", mp_parent);
if(!(child->mp_flags & ZOMBIE))
panic("tell_parent: child not a zombie");
if(child->mp_flags & TOLD_PARENT)
panic("tell_parent: telling parent again");
parent = &mproc[mp_parent];
/* Wake up the parent by sending the reply message. */
exitstatus = (child->mp_exitstatus << 8) | (child->mp_sigstatus & 0377);
parent->mp_reply.reply_res2 = exitstatus;
setreply(child->mp_parent, child->mp_pid);
parent->mp_flags &= ~WAITING; /* parent no longer waiting */
child->mp_flags &= ~ZOMBIE; /* child no longer a zombie */
child->mp_flags |= TOLD_PARENT; /* avoid informing parent twice */
}
/*===========================================================================*
* tell_tracer *
*===========================================================================*/
PRIVATE void tell_tracer(child)
struct mproc *child; /* tells which process is exiting */
{
int exitstatus, mp_tracer;
struct mproc *tracer;
mp_tracer = child->mp_tracer;
if (mp_tracer <= 0)
panic("tell_tracer: bad value in mp_tracer: %d", mp_tracer);
if(!(child->mp_flags & TRACE_ZOMBIE))
panic("tell_tracer: child not a zombie");
tracer = &mproc[mp_tracer];
exitstatus = (child->mp_exitstatus << 8) | (child->mp_sigstatus & 0377);
tracer->mp_reply.reply_res2 = exitstatus;
setreply(child->mp_tracer, child->mp_pid);
tracer->mp_flags &= ~WAITING; /* tracer no longer waiting */
child->mp_flags &= ~TRACE_ZOMBIE; /* child no longer zombie to tracer */
child->mp_flags |= ZOMBIE; /* child is now zombie to parent */
}
/*===========================================================================*
* tracer_died *
*===========================================================================*/
PRIVATE void tracer_died(child)
struct mproc *child; /* process being traced */
{
/* The process that was tracing the given child, has died for some reason.
* This is really the tracer's fault, but we can't let INIT deal with this.
*/
child->mp_tracer = NO_TRACER;
child->mp_flags &= ~TRACE_EXIT;
/* If the tracer died while the child was running or stopped, we have no
* idea what state the child is in. Avoid a trainwreck, by killing the child.
* Note that this may cause cascading exits.
*/
if (!(child->mp_flags & EXITING)) {
sig_proc(child, SIGKILL, TRUE /*trace*/, FALSE /* ksig */);
return;
}
/* If the tracer died while the child was telling it about its own death,
* forget about the tracer and notify the real parent instead.
*/
if (child->mp_flags & TRACE_ZOMBIE) {
child->mp_flags &= ~TRACE_ZOMBIE;
child->mp_flags |= ZOMBIE;
check_parent(child, TRUE /*try_cleanup*/);
}
}
/*===========================================================================*
* cleanup *
*===========================================================================*/
PRIVATE void cleanup(rmp)
register struct mproc *rmp; /* tells which process is exiting */
{
/* Release the process table entry and reinitialize some field. */
rmp->mp_pid = 0;
rmp->mp_flags = 0;
rmp->mp_child_utime = 0;
rmp->mp_child_stime = 0;
procs_in_use--;
}
#include "syslib.h"
PUBLIC int sys_fork(parent, child, child_endpoint, map_ptr, flags, msgaddr)
endpoint_t parent; /* process doing the fork */
endpoint_t child; /* which proc has been created by the fork */
endpoint_t *child_endpoint;
struct mem_map *map_ptr;
u32_t flags;
vir_bytes *msgaddr;
{
/* A process has forked. Tell the kernel. */
message m;
int r;
m.PR_ENDPT = parent;
m.PR_SLOT = child;
// 这个指针指向子进程的地址空间。
m.PR_MEM_PTR = (char *) map_ptr;
m.PR_FORK_FLAGS = flags;
// 内核调用没有服务器,只需要发送一个调用号即可。
r = _kernel_call(SYS_FORK, &m);
*child_endpoint = m.PR_ENDPT;
*msgaddr = (vir_bytes) m.PR_FORK_MSGADDR;
return r;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment