Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(6)

Side by Side Diff: src/syscall/exec_linux.go

Issue 126190043: syscall: Adds support for User Namespaces in Linux by a...
Patch Set: diff -r ce32e953ef6f5418efbcce5c25175aec25204eac https://code.google.com/p/go Created 11 years, 7 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 The Go Authors. All rights reserved. 1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // +build linux 5 // +build linux
6 6
7 package syscall 7 package syscall
8 8
9 import ( 9 import (
10 "unsafe" 10 "unsafe"
11 ) 11 )
12 12
13 // IdMap holds Container ID to Host ID mappings used for User Namespaces in Linu x. See user_namespaces(7).
iant 2014/10/01 22:27:18 Please break the comment line after "Linux.".
mrunalp 2014/10/02 00:07:04 Done.
14 type IdMap struct {
mrunalp 2014/10/01 21:48:42 Should I rename Id --> ID everywhere as per naming
iant 2014/10/01 22:27:18 Yes, please, sorry I missed that. Actually IDMap
mrunalp 2014/10/02 00:07:04 Done.
15 ContainerId int // Container Id.
16 HostId int // Host Id.
17 Size int // Size.
18 }
19
13 type SysProcAttr struct { 20 type SysProcAttr struct {
14 » Chroot string // Chroot. 21 » Chroot string // Chroot.
15 » Credential *Credential // Credential. 22 » Credential *Credential // Credential.
16 » Ptrace bool // Enable tracing. 23 » Ptrace bool // Enable tracing.
17 » Setsid bool // Create session. 24 » Setsid bool // Create session.
18 » Setpgid bool // Set process group ID to new pid (SYSV setpgrp) 25 » Setpgid bool // Set process group ID to new pid (SYSV setpgrp )
19 » Setctty bool // Set controlling terminal to fd Ctty (only mean ingful if Setsid is set) 26 » Setctty bool // Set controlling terminal to fd Ctty (only mea ningful if Setsid is set)
20 » Noctty bool // Detach fd 0 from controlling terminal 27 » Noctty bool // Detach fd 0 from controlling terminal
21 » Ctty int // Controlling TTY fd (Linux only) 28 » Ctty int // Controlling TTY fd (Linux only)
22 » Pdeathsig Signal // Signal that the process will get when its pare nt dies (Linux only) 29 » Pdeathsig Signal // Signal that the process will get when its par ent dies (Linux only)
23 » Cloneflags uintptr // Flags for clone calls (Linux only) 30 » Cloneflags uintptr // Flags for clone calls (Linux only)
24 » Foreground bool // Set foreground process group to child's pid. ( Implies Setpgid. Stdin should be a TTY) 31 » Foreground bool // Set foreground process group to child's pid. (Implies Setpgid. Stdin should be a TTY)
25 » Joinpgrp int // If != 0, child's process group ID. (Setpgid mu st not be set) 32 » Joinpgrp int // If != 0, child's process group ID. (Setpgid m ust not be set)
33 » UidMappings []IdMap // User ID mappings for user namespaces.
34 » GidMappings []IdMap // Group ID mappings for user namespaces.
26 } 35 }
27 36
28 // Implemented in runtime package. 37 // Implemented in runtime package.
29 func runtime_BeforeFork() 38 func runtime_BeforeFork()
30 func runtime_AfterFork() 39 func runtime_AfterFork()
31 40
32 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. 41 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child.
33 // If a dup or exec fails, write the errno error to pipe. 42 // If a dup or exec fails, write the errno error to pipe.
34 // (Pipe is close-on-exec so if exec succeeds, it will be closed.) 43 // (Pipe is close-on-exec so if exec succeeds, it will be closed.)
35 // In the child, this function must not acquire any locks, because 44 // In the child, this function must not acquire any locks, because
36 // they might have been locked at the time of the fork. This means 45 // they might have been locked at the time of the fork. This means
37 // no rescheduling, no malloc calls, and no new stack segments. 46 // no rescheduling, no malloc calls, and no new stack segments.
38 // For the same reason compiler does not race instrument it. 47 // For the same reason compiler does not race instrument it.
39 // The calls to RawSyscall are okay because they are assembly 48 // The calls to RawSyscall are okay because they are assembly
40 // functions that do not grow the stack. 49 // functions that do not grow the stack.
41 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { 50 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
42 // Declare all variables at top in case any 51 // Declare all variables at top in case any
43 // declarations require heap allocation (e.g., err1). 52 // declarations require heap allocation (e.g., err1).
44 var ( 53 var (
45 r1 uintptr 54 r1 uintptr
46 err1 Errno 55 err1 Errno
47 nextfd int 56 nextfd int
48 i int 57 i int
58 p [2]int
49 ) 59 )
50 60
51 // Guard against side effects of shuffling fds below. 61 // Guard against side effects of shuffling fds below.
52 // Make sure that nextfd is beyond any currently open files so 62 // Make sure that nextfd is beyond any currently open files so
53 // that we can't run the risk of overwriting any of them. 63 // that we can't run the risk of overwriting any of them.
54 fd := make([]int, len(attr.Files)) 64 fd := make([]int, len(attr.Files))
55 nextfd = len(attr.Files) 65 nextfd = len(attr.Files)
56 for i, ufd := range attr.Files { 66 for i, ufd := range attr.Files {
57 if nextfd < int(ufd) { 67 if nextfd < int(ufd) {
58 nextfd = int(ufd) 68 nextfd = int(ufd)
59 } 69 }
60 fd[i] = int(ufd) 70 fd[i] = int(ufd)
61 } 71 }
62 nextfd++ 72 nextfd++
63 73
74 // Allocate another pipe for parent to child communication for synchroni zing writing of uid/gid mappings.
75 if sys.UidMappings != nil || sys.GidMappings != nil {
76 p[0] = -1
iant 2014/10/01 22:27:18 No need to set p[0] and p[1] to -1. I'm not sure
mrunalp 2014/10/02 00:07:04 Done.
77 p[1] = -1
78 if err := forkExecPipe(p[:]); err != nil {
79 return 0, err.(Errno)
80 }
81 }
82
64 // About to call fork. 83 // About to call fork.
65 // No more allocation or calls of non-assembly functions. 84 // No more allocation or calls of non-assembly functions.
66 runtime_BeforeFork() 85 runtime_BeforeFork()
67 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) 86 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0)
68 if err1 != 0 { 87 if err1 != 0 {
69 runtime_AfterFork() 88 runtime_AfterFork()
70 return 0, err1 89 return 0, err1
71 } 90 }
72 91
73 if r1 != 0 { 92 if r1 != 0 {
74 // parent; return PID 93 // parent; return PID
75 runtime_AfterFork() 94 runtime_AfterFork()
76 pid = int(r1) 95 pid = int(r1)
77 96
97 if sys.UidMappings != nil || sys.GidMappings != nil {
98 Close(p[0])
99 var err2 uintptr
iant 2014/10/01 22:27:18 Should probably declare err2 up above next to err1
mrunalp 2014/10/02 00:07:04 Done.
100 err2 = 0
101 err := writeUidGidMappings(pid, sys)
102 if err != nil {
103 err2 = uintptr(err.(Errno))
104 }
105 RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Poin ter(&err2)), unsafe.Sizeof(err2))
mrunalp 2014/10/01 21:48:42 Again questions around error-handling here. There
iant 2014/10/01 22:27:18 Fortunately I don't think we have to worry about t
mrunalp 2014/10/02 00:07:04 Acknowledged.
106 Close(p[1])
107 }
108
78 if sys.Joinpgrp != 0 { 109 if sys.Joinpgrp != 0 {
79 // Place the child in the specified process group. 110 // Place the child in the specified process group.
80 RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0) 111 RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0)
81 } else if sys.Foreground || sys.Setpgid { 112 } else if sys.Foreground || sys.Setpgid {
82 // Place the child in a new process group. 113 // Place the child in a new process group.
83 RawSyscall(SYS_SETPGID, 0, 0, 0) 114 RawSyscall(SYS_SETPGID, 0, 0, 0)
84 115
85 if sys.Foreground { 116 if sys.Foreground {
86 // Set new foreground process group. 117 // Set new foreground process group.
87 RawSyscall(SYS_IOCTL, uintptr(Stdin), TIOCSPGRP, uintptr(unsafe.Pointer(&pid))) 118 RawSyscall(SYS_IOCTL, uintptr(Stdin), TIOCSPGRP, uintptr(unsafe.Pointer(&pid)))
88 } 119 }
89 } 120 }
90 121
91 return pid, 0 122 return pid, 0
92 } 123 }
93 124
94 // Fork succeeded, now in child. 125 // Fork succeeded, now in child.
95 126
127 // Wait for uid/gid mappings to be written.
128 if sys.UidMappings != nil || sys.GidMappings != nil {
129 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 {
130 goto childerror
131 }
132 _, _, err2 := RawSyscall(SYS_READ, uintptr(p[0]), uintptr(unsafe .Pointer(&err1)), uintptr(1))
iant 2014/10/01 22:27:17 The parent is going to write unsafe.Sizeof(uintptr
mrunalp 2014/10/02 00:07:04 Ahh, I missed that. Thanks!
mrunalp 2014/10/02 00:07:04 Done.
133 if err2 != 0 {
134 err1 = err2
135 goto childerror
136 }
137 if err1 != 0 {
138 goto childerror
139 }
140 }
141
96 // Parent death signal 142 // Parent death signal
97 if sys.Pdeathsig != 0 { 143 if sys.Pdeathsig != 0 {
98 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sy s.Pdeathsig), 0, 0, 0, 0) 144 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sy s.Pdeathsig), 0, 0, 0, 0)
99 if err1 != 0 { 145 if err1 != 0 {
100 goto childerror 146 goto childerror
101 } 147 }
102 148
103 // Signal self if parent is already dead. This might cause a 149 // Signal self if parent is already dead. This might cause a
104 // duplicate signal in rare cases, but it won't matter when 150 // duplicate signal in rare cases, but it won't matter when
105 // using SIGKILL. 151 // using SIGKILL.
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 if err = Pipe(p); err != nil { 335 if err = Pipe(p); err != nil {
290 return 336 return
291 } 337 }
292 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil { 338 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil {
293 return 339 return
294 } 340 }
295 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) 341 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC)
296 } 342 }
297 return 343 return
298 } 344 }
345
346 // writeIdMappings writes the user namespace uid or gid mappings to the specifie d path.
347 func writeIdMappings(path string, idMap []IdMap) error {
348 fd, err := Open(path, O_RDWR, 0)
349 if err != nil {
350 return err
351 }
352
353 data := ""
354 for _, im := range idMap {
355 data = data + itoa(im.ContainerId) + " " + itoa(im.HostId) + " " + itoa(im.Size) + "\n"
356 }
357
358 bytes, err := ByteSliceFromString(data)
359 if err != nil {
360 Close(fd)
361 return err
362 }
363
364 if _, err := Write(fd, bytes); err != nil {
365 Close(fd)
366 return err
367 }
368
369 if err := Close(fd); err != nil {
370 return err
371 }
372
373 return nil
374 }
375
376 // writeUidGidMappings writes uid/gid mappings for user namespaces for a process and it is called from the parent process.
iant 2014/10/01 22:27:18 Add a line break somewhere in the comment.
mrunalp 2014/10/02 00:07:04 Done.
377 func writeUidGidMappings(pid int, sys *SysProcAttr) error {
378 if sys.UidMappings != nil {
379 uidf := "/proc/" + itoa(pid) + "/uid_map"
380 if err := writeIdMappings(uidf, sys.UidMappings); err != nil {
381 return err
382 }
383 }
384
385 if sys.GidMappings != nil {
386 gidf := "/proc/" + itoa(pid) + "/gid_map"
387 if err := writeIdMappings(gidf, sys.GidMappings); err != nil {
388 return err
389 }
390 }
391
392 return nil
393 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b