| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
| 3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 // +build linux | 5 // +build linux |
| 6 | 6 |
| 7 package syscall | 7 package syscall |
| 8 | 8 |
| 9 import ( | 9 import ( |
| 10 "unsafe" | 10 "unsafe" |
| 11 ) | 11 ) |
| 12 | 12 |
| 13 // IdMap holds Container ID to Host ID mappings used for User Namespaces in Linu x. See user_namespaces(7). | |
|
iant
2014/10/01 22:27:18
Please break the comment line after "Linux.".
mrunalp
2014/10/02 00:07:04
Done.
| |
| 14 type IdMap struct { | |
|
mrunalp
2014/10/01 21:48:42
Should I rename Id --> ID everywhere as per naming
iant
2014/10/01 22:27:18
Yes, please, sorry I missed that.
Actually IDMap
mrunalp
2014/10/02 00:07:04
Done.
| |
| 15 ContainerId int // Container Id. | |
| 16 HostId int // Host Id. | |
| 17 Size int // Size. | |
| 18 } | |
| 19 | |
| 13 type SysProcAttr struct { | 20 type SysProcAttr struct { |
| 14 » Chroot string // Chroot. | 21 » Chroot string // Chroot. |
| 15 » Credential *Credential // Credential. | 22 » Credential *Credential // Credential. |
| 16 » Ptrace bool // Enable tracing. | 23 » Ptrace bool // Enable tracing. |
| 17 » Setsid bool // Create session. | 24 » Setsid bool // Create session. |
| 18 » Setpgid bool // Set process group ID to new pid (SYSV setpgrp) | 25 » Setpgid bool // Set process group ID to new pid (SYSV setpgrp ) |
| 19 » Setctty bool // Set controlling terminal to fd Ctty (only mean ingful if Setsid is set) | 26 » Setctty bool // Set controlling terminal to fd Ctty (only mea ningful if Setsid is set) |
| 20 » Noctty bool // Detach fd 0 from controlling terminal | 27 » Noctty bool // Detach fd 0 from controlling terminal |
| 21 » Ctty int // Controlling TTY fd (Linux only) | 28 » Ctty int // Controlling TTY fd (Linux only) |
| 22 » Pdeathsig Signal // Signal that the process will get when its pare nt dies (Linux only) | 29 » Pdeathsig Signal // Signal that the process will get when its par ent dies (Linux only) |
| 23 » Cloneflags uintptr // Flags for clone calls (Linux only) | 30 » Cloneflags uintptr // Flags for clone calls (Linux only) |
| 24 » Foreground bool // Set foreground process group to child's pid. ( Implies Setpgid. Stdin should be a TTY) | 31 » Foreground bool // Set foreground process group to child's pid. (Implies Setpgid. Stdin should be a TTY) |
| 25 » Joinpgrp int // If != 0, child's process group ID. (Setpgid mu st not be set) | 32 » Joinpgrp int // If != 0, child's process group ID. (Setpgid m ust not be set) |
| 33 » UidMappings []IdMap // User ID mappings for user namespaces. | |
| 34 » GidMappings []IdMap // Group ID mappings for user namespaces. | |
| 26 } | 35 } |
| 27 | 36 |
| 28 // Implemented in runtime package. | 37 // Implemented in runtime package. |
| 29 func runtime_BeforeFork() | 38 func runtime_BeforeFork() |
| 30 func runtime_AfterFork() | 39 func runtime_AfterFork() |
| 31 | 40 |
| 32 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. | 41 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. |
| 33 // If a dup or exec fails, write the errno error to pipe. | 42 // If a dup or exec fails, write the errno error to pipe. |
| 34 // (Pipe is close-on-exec so if exec succeeds, it will be closed.) | 43 // (Pipe is close-on-exec so if exec succeeds, it will be closed.) |
| 35 // In the child, this function must not acquire any locks, because | 44 // In the child, this function must not acquire any locks, because |
| 36 // they might have been locked at the time of the fork. This means | 45 // they might have been locked at the time of the fork. This means |
| 37 // no rescheduling, no malloc calls, and no new stack segments. | 46 // no rescheduling, no malloc calls, and no new stack segments. |
| 38 // For the same reason compiler does not race instrument it. | 47 // For the same reason compiler does not race instrument it. |
| 39 // The calls to RawSyscall are okay because they are assembly | 48 // The calls to RawSyscall are okay because they are assembly |
| 40 // functions that do not grow the stack. | 49 // functions that do not grow the stack. |
| 41 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { | 50 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { |
| 42 // Declare all variables at top in case any | 51 // Declare all variables at top in case any |
| 43 // declarations require heap allocation (e.g., err1). | 52 // declarations require heap allocation (e.g., err1). |
| 44 var ( | 53 var ( |
| 45 r1 uintptr | 54 r1 uintptr |
| 46 err1 Errno | 55 err1 Errno |
| 47 nextfd int | 56 nextfd int |
| 48 i int | 57 i int |
| 58 p [2]int | |
| 49 ) | 59 ) |
| 50 | 60 |
| 51 // Guard against side effects of shuffling fds below. | 61 // Guard against side effects of shuffling fds below. |
| 52 // Make sure that nextfd is beyond any currently open files so | 62 // Make sure that nextfd is beyond any currently open files so |
| 53 // that we can't run the risk of overwriting any of them. | 63 // that we can't run the risk of overwriting any of them. |
| 54 fd := make([]int, len(attr.Files)) | 64 fd := make([]int, len(attr.Files)) |
| 55 nextfd = len(attr.Files) | 65 nextfd = len(attr.Files) |
| 56 for i, ufd := range attr.Files { | 66 for i, ufd := range attr.Files { |
| 57 if nextfd < int(ufd) { | 67 if nextfd < int(ufd) { |
| 58 nextfd = int(ufd) | 68 nextfd = int(ufd) |
| 59 } | 69 } |
| 60 fd[i] = int(ufd) | 70 fd[i] = int(ufd) |
| 61 } | 71 } |
| 62 nextfd++ | 72 nextfd++ |
| 63 | 73 |
| 74 // Allocate another pipe for parent to child communication for synchroni zing writing of uid/gid mappings. | |
| 75 if sys.UidMappings != nil || sys.GidMappings != nil { | |
| 76 p[0] = -1 | |
|
iant
2014/10/01 22:27:18
No need to set p[0] and p[1] to -1. I'm not sure
mrunalp
2014/10/02 00:07:04
Done.
| |
| 77 p[1] = -1 | |
| 78 if err := forkExecPipe(p[:]); err != nil { | |
| 79 return 0, err.(Errno) | |
| 80 } | |
| 81 } | |
| 82 | |
| 64 // About to call fork. | 83 // About to call fork. |
| 65 // No more allocation or calls of non-assembly functions. | 84 // No more allocation or calls of non-assembly functions. |
| 66 runtime_BeforeFork() | 85 runtime_BeforeFork() |
| 67 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) | 86 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) |
| 68 if err1 != 0 { | 87 if err1 != 0 { |
| 69 runtime_AfterFork() | 88 runtime_AfterFork() |
| 70 return 0, err1 | 89 return 0, err1 |
| 71 } | 90 } |
| 72 | 91 |
| 73 if r1 != 0 { | 92 if r1 != 0 { |
| 74 // parent; return PID | 93 // parent; return PID |
| 75 runtime_AfterFork() | 94 runtime_AfterFork() |
| 76 pid = int(r1) | 95 pid = int(r1) |
| 77 | 96 |
| 97 if sys.UidMappings != nil || sys.GidMappings != nil { | |
| 98 Close(p[0]) | |
| 99 var err2 uintptr | |
|
iant
2014/10/01 22:27:18
Should probably declare err2 up above next to err1
mrunalp
2014/10/02 00:07:04
Done.
| |
| 100 err2 = 0 | |
| 101 err := writeUidGidMappings(pid, sys) | |
| 102 if err != nil { | |
| 103 err2 = uintptr(err.(Errno)) | |
| 104 } | |
| 105 RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Poin ter(&err2)), unsafe.Sizeof(err2)) | |
|
mrunalp
2014/10/01 21:48:42
Again questions around error-handling here. There
iant
2014/10/01 22:27:18
Fortunately I don't think we have to worry about t
mrunalp
2014/10/02 00:07:04
Acknowledged.
| |
| 106 Close(p[1]) | |
| 107 } | |
| 108 | |
| 78 if sys.Joinpgrp != 0 { | 109 if sys.Joinpgrp != 0 { |
| 79 // Place the child in the specified process group. | 110 // Place the child in the specified process group. |
| 80 RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0) | 111 RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0) |
| 81 } else if sys.Foreground || sys.Setpgid { | 112 } else if sys.Foreground || sys.Setpgid { |
| 82 // Place the child in a new process group. | 113 // Place the child in a new process group. |
| 83 RawSyscall(SYS_SETPGID, 0, 0, 0) | 114 RawSyscall(SYS_SETPGID, 0, 0, 0) |
| 84 | 115 |
| 85 if sys.Foreground { | 116 if sys.Foreground { |
| 86 // Set new foreground process group. | 117 // Set new foreground process group. |
| 87 RawSyscall(SYS_IOCTL, uintptr(Stdin), TIOCSPGRP, uintptr(unsafe.Pointer(&pid))) | 118 RawSyscall(SYS_IOCTL, uintptr(Stdin), TIOCSPGRP, uintptr(unsafe.Pointer(&pid))) |
| 88 } | 119 } |
| 89 } | 120 } |
| 90 | 121 |
| 91 return pid, 0 | 122 return pid, 0 |
| 92 } | 123 } |
| 93 | 124 |
| 94 // Fork succeeded, now in child. | 125 // Fork succeeded, now in child. |
| 95 | 126 |
| 127 // Wait for uid/gid mappings to be written. | |
| 128 if sys.UidMappings != nil || sys.GidMappings != nil { | |
| 129 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 { | |
| 130 goto childerror | |
| 131 } | |
| 132 _, _, err2 := RawSyscall(SYS_READ, uintptr(p[0]), uintptr(unsafe .Pointer(&err1)), uintptr(1)) | |
|
iant
2014/10/01 22:27:17
The parent is going to write unsafe.Sizeof(uintptr
mrunalp
2014/10/02 00:07:04
Ahh, I missed that. Thanks!
mrunalp
2014/10/02 00:07:04
Done.
| |
| 133 if err2 != 0 { | |
| 134 err1 = err2 | |
| 135 goto childerror | |
| 136 } | |
| 137 if err1 != 0 { | |
| 138 goto childerror | |
| 139 } | |
| 140 } | |
| 141 | |
| 96 // Parent death signal | 142 // Parent death signal |
| 97 if sys.Pdeathsig != 0 { | 143 if sys.Pdeathsig != 0 { |
| 98 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sy s.Pdeathsig), 0, 0, 0, 0) | 144 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sy s.Pdeathsig), 0, 0, 0, 0) |
| 99 if err1 != 0 { | 145 if err1 != 0 { |
| 100 goto childerror | 146 goto childerror |
| 101 } | 147 } |
| 102 | 148 |
| 103 // Signal self if parent is already dead. This might cause a | 149 // Signal self if parent is already dead. This might cause a |
| 104 // duplicate signal in rare cases, but it won't matter when | 150 // duplicate signal in rare cases, but it won't matter when |
| 105 // using SIGKILL. | 151 // using SIGKILL. |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 289 if err = Pipe(p); err != nil { | 335 if err = Pipe(p); err != nil { |
| 290 return | 336 return |
| 291 } | 337 } |
| 292 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil { | 338 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil { |
| 293 return | 339 return |
| 294 } | 340 } |
| 295 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) | 341 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) |
| 296 } | 342 } |
| 297 return | 343 return |
| 298 } | 344 } |
| 345 | |
| 346 // writeIdMappings writes the user namespace uid or gid mappings to the specifie d path. | |
| 347 func writeIdMappings(path string, idMap []IdMap) error { | |
| 348 fd, err := Open(path, O_RDWR, 0) | |
| 349 if err != nil { | |
| 350 return err | |
| 351 } | |
| 352 | |
| 353 data := "" | |
| 354 for _, im := range idMap { | |
| 355 data = data + itoa(im.ContainerId) + " " + itoa(im.HostId) + " " + itoa(im.Size) + "\n" | |
| 356 } | |
| 357 | |
| 358 bytes, err := ByteSliceFromString(data) | |
| 359 if err != nil { | |
| 360 Close(fd) | |
| 361 return err | |
| 362 } | |
| 363 | |
| 364 if _, err := Write(fd, bytes); err != nil { | |
| 365 Close(fd) | |
| 366 return err | |
| 367 } | |
| 368 | |
| 369 if err := Close(fd); err != nil { | |
| 370 return err | |
| 371 } | |
| 372 | |
| 373 return nil | |
| 374 } | |
| 375 | |
| 376 // writeUidGidMappings writes uid/gid mappings for user namespaces for a process and it is called from the parent process. | |
|
iant
2014/10/01 22:27:18
Add a line break somewhere in the comment.
mrunalp
2014/10/02 00:07:04
Done.
| |
| 377 func writeUidGidMappings(pid int, sys *SysProcAttr) error { | |
| 378 if sys.UidMappings != nil { | |
| 379 uidf := "/proc/" + itoa(pid) + "/uid_map" | |
| 380 if err := writeIdMappings(uidf, sys.UidMappings); err != nil { | |
| 381 return err | |
| 382 } | |
| 383 } | |
| 384 | |
| 385 if sys.GidMappings != nil { | |
| 386 gidf := "/proc/" + itoa(pid) + "/gid_map" | |
| 387 if err := writeIdMappings(gidf, sys.GidMappings); err != nil { | |
| 388 return err | |
| 389 } | |
| 390 } | |
| 391 | |
| 392 return nil | |
| 393 } | |
| OLD | NEW |