Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(92)

Side by Side Diff: src/syscall/exec_linux.go

Issue 126190043: syscall: Adds support for User Namespaces in Linux by a...
Patch Set: diff -r ce32e953ef6f5418efbcce5c25175aec25204eac https://code.google.com/p/go Created 11 years, 7 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 The Go Authors. All rights reserved. 1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // +build linux 5 // +build linux
6 6
7 package syscall 7 package syscall
8 8
9 import ( 9 import (
10 "unsafe" 10 "unsafe"
11 ) 11 )
12 12
13 // SysProcIDMap holds Container ID to Host ID mappings used for User Namespaces in Linux.
14 // See user_namespaces(7).
15 type SysProcIDMap struct {
16 ContainerID int // Container ID.
17 HostID int // Host ID.
18 Size int // Size.
19 }
20
13 type SysProcAttr struct { 21 type SysProcAttr struct {
14 » Chroot string // Chroot. 22 » Chroot string // Chroot.
15 » Credential *Credential // Credential. 23 » Credential *Credential // Credential.
16 » Ptrace bool // Enable tracing. 24 » Ptrace bool // Enable tracing.
17 » Setsid bool // Create session. 25 » Setsid bool // Create session.
18 » Setpgid bool // Set process group ID to new pid (SYSV setpgrp) 26 » Setpgid bool // Set process group ID to new pid (SYSV setp grp)
19 » Setctty bool // Set controlling terminal to fd Ctty (only mean ingful if Setsid is set) 27 » Setctty bool // Set controlling terminal to fd Ctty (only meaningful if Setsid is set)
20 » Noctty bool // Detach fd 0 from controlling terminal 28 » Noctty bool // Detach fd 0 from controlling terminal
21 » Ctty int // Controlling TTY fd (Linux only) 29 » Ctty int // Controlling TTY fd (Linux only)
22 » Pdeathsig Signal // Signal that the process will get when its pare nt dies (Linux only) 30 » Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only)
23 » Cloneflags uintptr // Flags for clone calls (Linux only) 31 » Cloneflags uintptr // Flags for clone calls (Linux only)
24 » Foreground bool // Set foreground process group to child's pid. ( Implies Setpgid. Stdin should be a TTY) 32 » Foreground bool // Set foreground process group to child's pi d. (Implies Setpgid. Stdin should be a TTY)
25 » Joinpgrp int // If != 0, child's process group ID. (Setpgid mu st not be set) 33 » Joinpgrp int // If != 0, child's process group ID. (Setpgi d must not be set)
34 » UidMappings []SysProcIDMap // User ID mappings for user namespaces.
35 » GidMappings []SysProcIDMap // Group ID mappings for user namespaces.
26 } 36 }
27 37
28 // Implemented in runtime package. 38 // Implemented in runtime package.
29 func runtime_BeforeFork() 39 func runtime_BeforeFork()
30 func runtime_AfterFork() 40 func runtime_AfterFork()
31 41
32 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. 42 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child.
33 // If a dup or exec fails, write the errno error to pipe. 43 // If a dup or exec fails, write the errno error to pipe.
34 // (Pipe is close-on-exec so if exec succeeds, it will be closed.) 44 // (Pipe is close-on-exec so if exec succeeds, it will be closed.)
35 // In the child, this function must not acquire any locks, because 45 // In the child, this function must not acquire any locks, because
36 // they might have been locked at the time of the fork. This means 46 // they might have been locked at the time of the fork. This means
37 // no rescheduling, no malloc calls, and no new stack segments. 47 // no rescheduling, no malloc calls, and no new stack segments.
38 // For the same reason compiler does not race instrument it. 48 // For the same reason compiler does not race instrument it.
39 // The calls to RawSyscall are okay because they are assembly 49 // The calls to RawSyscall are okay because they are assembly
40 // functions that do not grow the stack. 50 // functions that do not grow the stack.
41 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { 51 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
42 // Declare all variables at top in case any 52 // Declare all variables at top in case any
43 // declarations require heap allocation (e.g., err1). 53 // declarations require heap allocation (e.g., err1).
44 var ( 54 var (
45 r1 uintptr 55 r1 uintptr
46 err1 Errno 56 err1 Errno
57 err2 Errno
47 nextfd int 58 nextfd int
48 i int 59 i int
60 p [2]int
49 ) 61 )
50 62
51 // Guard against side effects of shuffling fds below. 63 // Guard against side effects of shuffling fds below.
52 // Make sure that nextfd is beyond any currently open files so 64 // Make sure that nextfd is beyond any currently open files so
53 // that we can't run the risk of overwriting any of them. 65 // that we can't run the risk of overwriting any of them.
54 fd := make([]int, len(attr.Files)) 66 fd := make([]int, len(attr.Files))
55 nextfd = len(attr.Files) 67 nextfd = len(attr.Files)
56 for i, ufd := range attr.Files { 68 for i, ufd := range attr.Files {
57 if nextfd < int(ufd) { 69 if nextfd < int(ufd) {
58 nextfd = int(ufd) 70 nextfd = int(ufd)
59 } 71 }
60 fd[i] = int(ufd) 72 fd[i] = int(ufd)
61 } 73 }
62 nextfd++ 74 nextfd++
63 75
76 // Allocate another pipe for parent to child communication for
77 // synchronizing writing of User ID/Group ID mappings.
78 if sys.UidMappings != nil || sys.GidMappings != nil {
79 if err := forkExecPipe(p[:]); err != nil {
80 return 0, err.(Errno)
81 }
82 }
83
64 // About to call fork. 84 // About to call fork.
65 // No more allocation or calls of non-assembly functions. 85 // No more allocation or calls of non-assembly functions.
66 runtime_BeforeFork() 86 runtime_BeforeFork()
67 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) 87 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0)
68 if err1 != 0 { 88 if err1 != 0 {
69 runtime_AfterFork() 89 runtime_AfterFork()
70 return 0, err1 90 return 0, err1
71 } 91 }
72 92
73 if r1 != 0 { 93 if r1 != 0 {
74 // parent; return PID 94 // parent; return PID
75 runtime_AfterFork() 95 runtime_AfterFork()
76 pid = int(r1) 96 pid = int(r1)
77 97
98 if sys.UidMappings != nil || sys.GidMappings != nil {
99 Close(p[0])
100 err := writeUidGidMappings(pid, sys)
101 if err != nil {
102 err2 = err.(Errno)
103 }
104 RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Poin ter(&err2)), unsafe.Sizeof(err2))
105 Close(p[1])
106 }
107
78 if sys.Joinpgrp != 0 { 108 if sys.Joinpgrp != 0 {
79 // Place the child in the specified process group. 109 // Place the child in the specified process group.
80 RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0) 110 RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0)
81 } else if sys.Foreground || sys.Setpgid { 111 } else if sys.Foreground || sys.Setpgid {
82 // Place the child in a new process group. 112 // Place the child in a new process group.
83 RawSyscall(SYS_SETPGID, 0, 0, 0) 113 RawSyscall(SYS_SETPGID, 0, 0, 0)
84 114
85 if sys.Foreground { 115 if sys.Foreground {
86 // Set new foreground process group. 116 // Set new foreground process group.
87 RawSyscall(SYS_IOCTL, uintptr(Stdin), TIOCSPGRP, uintptr(unsafe.Pointer(&pid))) 117 RawSyscall(SYS_IOCTL, uintptr(Stdin), TIOCSPGRP, uintptr(unsafe.Pointer(&pid)))
88 } 118 }
89 } 119 }
90 120
91 return pid, 0 121 return pid, 0
92 } 122 }
93 123
94 // Fork succeeded, now in child. 124 // Fork succeeded, now in child.
95 125
126 // Wait for User ID/Group ID mappings to be written.
127 if sys.UidMappings != nil || sys.GidMappings != nil {
128 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 {
129 goto childerror
130 }
131 r1, _, err1 = RawSyscall(SYS_READ, uintptr(p[0]), uintptr(unsafe .Pointer(&err2)), unsafe.Sizeof(err2))
132 if err1 != 0 || r1 != unsafe.Sizeof(uintptr(0)) {
mrunalp 2014/10/02 00:07:04 Should we set err1 to something specific when r1 d
iant 2014/10/02 00:33:34 Yes. I guess EINVAL unless you can think of somet
mrunalp 2014/10/02 05:14:25 Sounds good to me.
mrunalp 2014/10/02 05:14:25 Done.
133 goto childerror
134 }
135 if err2 != 0 {
136 err1 = err2
137 goto childerror
138 }
139 }
140
96 // Parent death signal 141 // Parent death signal
97 if sys.Pdeathsig != 0 { 142 if sys.Pdeathsig != 0 {
98 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sy s.Pdeathsig), 0, 0, 0, 0) 143 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sy s.Pdeathsig), 0, 0, 0, 0)
99 if err1 != 0 { 144 if err1 != 0 {
100 goto childerror 145 goto childerror
101 } 146 }
102 147
103 // Signal self if parent is already dead. This might cause a 148 // Signal self if parent is already dead. This might cause a
104 // duplicate signal in rare cases, but it won't matter when 149 // duplicate signal in rare cases, but it won't matter when
105 // using SIGKILL. 150 // using SIGKILL.
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 if err = Pipe(p); err != nil { 334 if err = Pipe(p); err != nil {
290 return 335 return
291 } 336 }
292 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil { 337 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil {
293 return 338 return
294 } 339 }
295 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) 340 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC)
296 } 341 }
297 return 342 return
298 } 343 }
344
345 // writeIDMappings writes the user namespace User ID or Group ID mappings to the specified path.
346 func writeIDMappings(path string, idMap []SysProcIDMap) error {
347 fd, err := Open(path, O_RDWR, 0)
348 if err != nil {
349 return err
350 }
351
352 data := ""
353 for _, im := range idMap {
354 data = data + itoa(im.ContainerID) + " " + itoa(im.HostID) + " " + itoa(im.Size) + "\n"
355 }
356
357 bytes, err := ByteSliceFromString(data)
358 if err != nil {
359 Close(fd)
360 return err
361 }
362
363 if _, err := Write(fd, bytes); err != nil {
364 Close(fd)
365 return err
366 }
367
368 if err := Close(fd); err != nil {
369 return err
370 }
371
372 return nil
373 }
374
375 // writeUidGidMappings writes User ID and Group ID mappings for user namespaces
376 // for a process and it is called from the parent process.
377 func writeUidGidMappings(pid int, sys *SysProcAttr) error {
378 if sys.UidMappings != nil {
379 uidf := "/proc/" + itoa(pid) + "/uid_map"
380 if err := writeIDMappings(uidf, sys.UidMappings); err != nil {
381 return err
382 }
383 }
384
385 if sys.GidMappings != nil {
386 gidf := "/proc/" + itoa(pid) + "/gid_map"
387 if err := writeIDMappings(gidf, sys.GidMappings); err != nil {
388 return err
389 }
390 }
391
392 return nil
393 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b