Skip to content

Compiling a Cilk/C++ program with Cilkscale instrumentation prints LLVM IR to stderr #129

@ailiop

Description

@ailiop

Describe the bug

When I compile a Cilk/C++ program with Cilkscale instrumentation (-fcilktool=cilkscale), LLVM IR for my Cilk function is printed to stderr. Compilation seems to proceed OK otherwise: the binary runs and passes correctness tests, and Cilkscale outputs its work-span analysis report as expected.

The same is true when compiling with Cilkscale-benchmark instrumentation (-fcilktool=cilkscale-benchmark).

I do not see this behavior with Cilk/C code or if I use OpenCilk 1.0.

Expected behavior

No printing of LLVM IR when compiling a Cilk/C++ program with Cilkscale instrumentation.

OpenCilk version

  • Release version: 2.0
  • Distribution method: built from source (tags opencilk/v2.0 for all repos)

System information

  • OS: Ubuntu 20.04 (via WSL2 on Windows 10)
  • CPU: Intel Core i7-10875H

Steps to reproduce (include relevant output)

Minimal working example, hello.cpp:

#include <iostream>
#include <cilk/cilk.h>

void hello() {
    std::cout << "Hello world!\n";
}

int main() {
    cilk_scope {
        cilk_spawn hello();
        hello();
    }
    return 0;
}

Compile with Cilkscale instrumentation:

$ /opt/opencilk-2/bin/clang++ hello.cpp -fopencilk -fcilktool=cilkscale -o hello
Sync   sync within %syncreg, label %sync.continuehas unwind
; Function Attrs: mustprogress noinline norecurse optnone uwtable
define dso_local noundef i32 @main() #5 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
  %retval = alloca i32, align 4
  %syncreg = call token @llvm.syncregion.start()
  %exn.slot10 = alloca i8*, align 8
  %ehselector.slot11 = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  %0 = call token @llvm.tapir.runtime.start()
  br label %entry.split

entry.split:                                      ; preds = %entry
  %1 = call token @llvm.taskframe.create()
  %exn.slot3 = alloca i8*, align 8
  %ehselector.slot4 = alloca i32, align 4
  detach within %syncreg, label %det.achd, label %det.cont unwind label %lpad2

det.achd:                                         ; preds = %entry.split
  %exn.slot = alloca i8*, align 8
  %ehselector.slot = alloca i32, align 4
  call void @llvm.taskframe.use(token %1)
  invoke void @_Z5hellov()
          to label %invoke.cont unwind label %lpad

invoke.cont:                                      ; preds = %det.achd
  reattach within %syncreg, label %det.cont

det.cont:                                         ; preds = %entry.split, %invoke.cont
  invoke void @_Z5hellov()
          to label %invoke.cont12 unwind label %lpad9.csi-split.csi-split-lp

invoke.cont12:                                    ; preds = %det.cont
  sync within %syncreg, label %sync.continue

sync.continue:                                    ; preds = %invoke.cont12
  invoke void @llvm.sync.unwind(token %syncreg)
          to label %invoke.cont13 unwind label %lpad9.csi-split-lp

invoke.cont13:                                    ; preds = %sync.continue
  call void @llvm.tapir.runtime.end(token %0)
  ret i32 0

lpad:                                             ; preds = %det.achd
  %2 = landingpad { i8*, i32 }
          cleanup
  %3 = extractvalue { i8*, i32 } %2, 0
  store i8* %3, i8** %exn.slot, align 8
  %4 = extractvalue { i8*, i32 } %2, 1
  store i32 %4, i32* %ehselector.slot, align 4
  %exn = load i8*, i8** %exn.slot, align 8
  %sel = load i32, i32* %ehselector.slot, align 4
  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
  %lpad.val1 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
  invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %syncreg, { i8*, i32 } %lpad.val1)
          to label %unreachable unwind label %lpad2

lpad2:                                            ; preds = %entry.split, %lpad
  %5 = landingpad { i8*, i32 }
          cleanup
  %6 = extractvalue { i8*, i32 } %5, 0
  store i8* %6, i8** %exn.slot3, align 8
  %7 = extractvalue { i8*, i32 } %5, 1
  store i32 %7, i32* %ehselector.slot4, align 4
  br label %ehcleanup

ehcleanup:                                        ; preds = %lpad2
  %exn5 = load i8*, i8** %exn.slot3, align 8
  %sel6 = load i32, i32* %ehselector.slot4, align 4
  %lpad.val7 = insertvalue { i8*, i32 } undef, i8* %exn5, 0
  %lpad.val8 = insertvalue { i8*, i32 } %lpad.val7, i32 %sel6, 1
  invoke void @llvm.taskframe.resume.sl_p0i8i32s(token %1, { i8*, i32 } %lpad.val8)
          to label %unreachable unwind label %lpad9.csi-split.csi-split

lpad9.csi-split-lp:                               ; preds = %sync.continue
  %lpad.csi-split-lp = landingpad { i8*, i32 }
          cleanup
  br label %lpad9

lpad9.csi-split.csi-split-lp:                     ; preds = %det.cont
  %lpad.csi-split-lp21 = landingpad { i8*, i32 }
          cleanup
  br label %lpad9.csi-split

lpad9.csi-split.csi-split:                        ; preds = %ehcleanup
  %lpad.csi-split22 = landingpad { i8*, i32 }
          cleanup
  br label %lpad9.csi-split

lpad9.csi-split:                                  ; preds = %lpad9.csi-split.csi-split, %lpad9.csi-split.csi-split-lp
  %lpad.phi23 = phi { i8*, i32 } [ %lpad.csi-split-lp21, %lpad9.csi-split.csi-split-lp ], [ %lpad.csi-split22, %lpad9.csi-split.csi-split ]
  br label %lpad9

lpad9:                                            ; preds = %lpad9.csi-split, %lpad9.csi-split-lp
  %lpad.phi = phi { i8*, i32 } [ %lpad.csi-split-lp, %lpad9.csi-split-lp ], [ %lpad.phi23, %lpad9.csi-split ]
  %8 = extractvalue { i8*, i32 } %lpad.phi, 0
  store i8* %8, i8** %exn.slot10, align 8
  %9 = extractvalue { i8*, i32 } %lpad.phi, 1
  store i32 %9, i32* %ehselector.slot11, align 4
  call void @llvm.tapir.runtime.end(token %0)
  br label %eh.resume

eh.resume:                                        ; preds = %lpad9
  %exn15 = load i8*, i8** %exn.slot10, align 8
  %sel16 = load i32, i32* %ehselector.slot11, align 4
  %lpad.val17 = insertvalue { i8*, i32 } undef, i8* %exn15, 0
  %lpad.val18 = insertvalue { i8*, i32 } %lpad.val17, i32 %sel16, 1
  resume { i8*, i32 } %lpad.val18

unreachable:                                      ; preds = %ehcleanup, %lpad
  unreachable
}

Run:

$ ./hello
Hello world!
Hello world!
tag,work (seconds),span (seconds),parallelism,burdened_span (seconds),burdened_parallelism
,0.0030673,0.0030097,1.01914,0.00301595,1.01703

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions