frast is a prototype / proof-of-concept of two things:
- An R-to-Fortran transpiler
- A just-in-time (JIT) compiler for R functions
It takes an R function and compiles it to fast machine code, by way of transpiling it to Fortran.
R functions are translated to Fortran subroutines, which are
then compiled as a stand-alone Fortran 2018 module. Compilation is
performed using gfortran. The compiled shared object is then dynamically
loaded using dyn.load(), and the compiled function in the loaded object
can then be evaluated using RFI::.ModernFortran.
Some potential next steps:
- compile w/ llvm-jit toolchain (ala llvmlite/numba) instead of gfortran.
- remove the need for explicit type manifests in R. (note-to-self: see if it's
easy to recycle the implementation in
package:memoise). A solid approach would be to add a check beforeevaling the R function, to check if a compiled version of that function matching the calling arguments signature is available. If not, launch a background task compiling (so it will be available next time the function is evaluated) but proceed with evaluating the R function as normal. This will require forcing of all promises, but that should be a small price to pay for the speed boost. Also, investigate if the signature matching in S4 or R7 is flexible enough for this, or if this would require rolling a custom implementation for dispatching + caching methods. - autogrenerate R wrappers for R package authors, integrate transpilation to Fortran with
R CMD buildso that packages users don't have to manage the generated Fortran files at all, while still giving package users the benefits of a package that contains (auto-generated, fast, AOT compiled) Fortran subroutines.
This is a basic example
library(frast)
library(RFI)
addone <- function(x) {
manifest(Var(x, "d", shape = ":"))
x = x + 1
}(ptr <- load_so(addone))
#> module mod_addone
#>
#> use iso_c_binding, only: c_int, c_double, c_double_complex, c_bool
#> implicit none
#>
#> contains
#>
#> subroutine addone(x) bind(c)
#> real(c_double) :: x(:)
#> x = x + 1
#> end subroutine addone
#>
#> end module mod_addone
#> <pointer: 0x7f9ce7d35120>
#> attr(,"class")
#> [1] "NativeSymbol"
RFI::.ModernFortran(ptr, array(1))
#> [[1]]
#> [1] 2
# .Fortran(ptr, array(1))Lets compile a convolve function.
convolve_r <- function(a, b) {
ab = double(length(a) + length(b) - 1)
for (i in seq_along(a))
for (j in seq_along(b))
ab[i + j - 1] = ab[i + j - 1] + a[i] * b[j]
ab
}
convolve_rf <- function(a, b, ab) {
manifest(
Var(i, "i"),
Var(j, "i"),
Var(a, "d", 1, modifiable = FALSE),
Var(b, "d", 1, modifiable = FALSE),
Var(ab, "d", 1, modifiable = TRUE)
)
for (i in seq_along(a))
for (j in seq_along(b))
ab[i + j - 1] = ab[i + j - 1] + a[i] * b[j]
}Here is what it looks like translated to Fortran:
frast:::translate(convolve_rf)
#> subroutine convolve_rf(a,b,ab) bind(c)
#> integer(c_int) :: i
#> integer(c_int) :: j
#> real(c_double), intent(in) :: a(:)
#> real(c_double), intent(in) :: b(:)
#> real(c_double), intent(in out) :: ab(:)
#> do i = 1, size(a)
#> do j = 1, size(b)
#> ab(i + j - 1) = ab(i + j - 1) + a(i) * b(j)
#> end do
#> end do
#> end subroutine convolve_rfand also compare it to some alternative approaches to see what the speedup is.
convolve_c_ <- inline::cfunction(
signature(
a = "double",
na = "integer",
b = "double",
nb = "integer",
ab = "double"
),
body = "
//void convolve(double *a, int *na, double *b, int *nb, double *ab)
{
int nab = *na + *nb - 1;
for(int i = 0; i < nab; i++)
ab[i] = 0.0;
for(int i = 0; i < *na; i++)
for(int j = 0; j < *nb; j++)
ab[i + j] += a[i] * b[j];
}", convention = ".C")
convolve_c <- function(a, b) {
convolve_c_(a, length(a), b, length(b),
double(length(a) + length(b) - 1))[[5L]]
}
convolve_r <- function(a, b) {
ab = double(length(a) + length(b) - 1)
for (i in seq_along(a))
for (j in seq_along(b))
ab[i + j - 1] = ab[i + j - 1] + a[i] * b[j]
ab
}
convolve_rf <- function(a, b, ab) {
manifest(
Var(i, "i"),
Var(j, "i"),
Var(a, "d", 1, modifiable = FALSE),
Var(b, "d", 1, modifiable = FALSE),
Var(ab, "d", 1, modifiable = TRUE)
)
for (i in seq_along(a))
for (j in seq_along(b))
ab[i + j - 1] = ab[i + j - 1] + a[i] * b[j]
}
ptr <- load_so(convolve_rf)
#> module mod_convolve_rf
#>
#> use iso_c_binding, only: c_int, c_double, c_double_complex, c_bool
#> implicit none
#>
#> contains
#>
#> subroutine convolve_rf(a,b,ab) bind(c)
#> integer(c_int) :: i
#> integer(c_int) :: j
#> real(c_double), intent(in) :: a(:)
#> real(c_double), intent(in) :: b(:)
#> real(c_double), intent(in out) :: ab(:)
#> do i = 1, size(a)
#> do j = 1, size(b)
#> ab(i + j - 1) = ab(i + j - 1) + a(i) * b(j)
#> end do
#> end do
#> end subroutine convolve_rf
#>
#> end module mod_convolve_rf
convolve_f <- function(a, b) {
.ModernFortran(ptr, a, b, double(length(a) + length(b) - 1),
DUP = FALSE)[[3L]]
}
a <- runif(1024)
b <- runif(32)
all.equal(convolve_r(a, b), convolve_f(a, b))
#> [1] TRUE
all.equal(convolve_r(a, b), convolve_c(a, b))
#> [1] TRUE
r <- bench::mark(convolve_f(a, b),
convolve_c(a, b),
convolve_r(a, b))plot(r)
#> Loading required namespace: tidyrsessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os Ubuntu 20.04.1 LTS
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/New_York
#> date 2020-08-09
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
#> beeswarm 0.2.3 2016-04-25 [1] CRAN (R 4.0.2)
#> bench 1.1.1 2020-01-13 [1] CRAN (R 4.0.2)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.2)
#> colorspace 1.4-1 2019-03-18 [1] CRAN (R 4.0.2)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.2)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.2)
#> dplyr 1.0.1 2020-07-31 [1] CRAN (R 4.0.2)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.2)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.2)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.2)
#> farver 2.0.3 2020-01-16 [1] CRAN (R 4.0.2)
#> frast * 0.0.0.9000 2020-08-09 [1] local
#> generics 0.0.2 2018-11-29 [1] CRAN (R 4.0.2)
#> ggbeeswarm 0.6.0 2017-08-07 [1] CRAN (R 4.0.2)
#> ggplot2 3.3.2 2020-06-19 [1] CRAN (R 4.0.2)
#> glue 1.4.1 2020-05-13 [1] CRAN (R 4.0.2)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.2)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.2)
#> inline 0.3.15 2018-05-18 [1] CRAN (R 4.0.2)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.2)
#> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.2)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.2)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.2)
#> pillar 1.4.6 2020-07-10 [1] CRAN (R 4.0.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
#> profmem 0.5.0 2018-01-30 [1] CRAN (R 4.0.2)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.2)
#> ragg 0.3.1.9000 2020-08-09 [1] Github (r-lib/ragg@2f081db)
#> RFI * 0.0.0.9000 2020-08-09 [1] local
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.2)
#> scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.2)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.2)
#> stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
#> systemfonts 0.2.3 2020-06-09 [1] CRAN (R 4.0.2)
#> tibble 3.0.3 2020-07-10 [1] CRAN (R 4.0.2)
#> tidyr 1.1.1 2020-07-31 [1] CRAN (R 4.0.2)
#> tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.2)
#> vctrs 0.3.2 2020-07-15 [1] CRAN (R 4.0.2)
#> vipor 0.4.5 2017-03-22 [1] CRAN (R 4.0.2)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.2)
#> xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
#>
#> [1] /home/tomasz/.local/lib/R/site-library
#> [2] /home/tomasz/.local/lib/R/library
sessioninfo::platform_info()
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os Ubuntu 20.04.1 LTS
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/New_York
#> date 2020-08-09