R’s C interface
from Advance R (1st ed)
An incomplete review
C data structures*
* Organized in groups according to how they use memory
Atomic Vectors
Atomic Vectors
zeroes <- cfunction(c(n_ = "integer"), '
int n = asInteger(n_);
SEXP out = PROTECT(allocVector(INTSXP, n));
memset(INTEGER(out), 0, n * sizeof(int));
UNPROTECT(1);
return out;
')
zeroes(10);
#> [1] 0 0 0 0 0 0 0 0 0 0
Atomic Vectors
zeroes <- cfunction(c(n_ = "integer"), '
int n = asInteger(n_);
SEXP out = PROTECT(allocVector(INTSXP, n));
memset(INTEGER(out), 0, n * sizeof(int));
UNPROTECT(1);
return out;
')
zeroes(10);
#> [1] 0 0 0 0 0 0 0 0 0 0
Atomic Vectors
zeroes <- cfunction(c(n_ = "integer"), '
int n = asInteger(n_);
SEXP out = PROTECT(allocVector(INTSXP, n));
memset(INTEGER(out), 0, n * sizeof(int));
UNPROTECT(1);
return out;
')
zeroes(10);
#> [1] 0 0 0 0 0 0 0 0 0 0
Atomic Vectors
zeroes <- cfunction(c(n_ = "integer"), '
int n = asInteger(n_);
SEXP out = PROTECT(allocVector(INTSXP, n));
memset(INTEGER(out), 0, n * sizeof(int));
UNPROTECT(1);
return out;
')
zeroes(10);
#> [1] 0 0 0 0 0 0 0 0 0 0
Better use PROTECT()
that sorry
Applies to all SEXP types
Atomic Vectors
INTEGER(),
LOGICAL(),
COMPLEX(),
RAW() → allows you to access the stored data
add_two <- cfunction(c(x = "numeric"), "
int n = length(x);
double *px, *pout;
SEXP out = PROTECT(allocVector(REALSXP, n));
px = REAL(x);
pout = REAL(out);
for (int i = 0; i < n; i++) {
pout[i] = px[i] + 2;
}
UNPROTECT(1);
return out;
")
add_two(as.numeric(1:10))
#> [1] 3 4 5 6 7 8 9 10 11 12
Character vectors and lists
Character vectors and lists
abc <- cfunction(NULL, '
SEXP out = PROTECT(allocVector(STRSXP, 3));
SET_STRING_ELT(out, 0, mkChar("a"));
SET_STRING_ELT(out, 1, mkChar("b"));
SET_STRING_ELT(out, 2, mkChar("c"));
UNPROTECT(1);
return out;
')
abc()
#> [1] "a" "b" "c"
Character vectors and lists
abc <- cfunction(NULL, '
SEXP out = PROTECT(allocVector(STRSXP, 3));
SET_STRING_ELT(out, 0, mkChar("a"));
SET_STRING_ELT(out, 1, mkChar("b"));
SET_STRING_ELT(out, 2, mkChar("c"));
UNPROTECT(1);
return out;
')
abc()
#> [1] "a" "b" "c"
Character vectors and lists
abc <- cfunction(NULL, '
SEXP out = PROTECT(allocVector(STRSXP, 3));
SET_STRING_ELT(out, 0, mkChar("a"));
SET_STRING_ELT(out, 1, mkChar("b"));
SET_STRING_ELT(out, 2, mkChar("c"));
UNPROTECT(1);
return out;
')
Coercing scalars
® → ©
asLogical(x): LGLSXP -> int
asInteger(x): INTSXP -> int
asReal(x): REALSXP -> double
CHAR(asChar(x)): STRSXP -> const char*
© → ®
ScalarLogical(x): int -> LGLSXP
ScalarInteger(x): int -> INTSXP
ScalarReal(x): double -> REALSXP
mkString(x): const char* -> STRSXP
use translateChar() instead
Character vectors and lists
abc <- cfunction(NULL, '
SEXP out = PROTECT(allocVector(STRSXP, 3));
SET_STRING_ELT(out, 0, mkChar("a"));
SET_STRING_ELT(out, 1, mkChar("b"));
SET_STRING_ELT(out, 2, mkChar("c"));
UNPROTECT(1);
return out;
')
abc()
#> [1] "a" "b" "c"
Pairlists
Missing values
is_na <- cfunction(c(x = "ANY"), '
int n = length(x);
SEXP out = PROTECT(allocVector(LGLSXP, n));
for (int i = 0; i < n; i++) {
switch(TYPEOF(x)) {
case LGLSXP:
LOGICAL(out)[i] = (LOGICAL(x)[i] == NA_LOGICAL);
break;
case INTSXP:
LOGICAL(out)[i] = (INTEGER(x)[i] == NA_INTEGER);
break;
case REALSXP:
LOGICAL(out)[i] = ISNA(REAL(x)[i]);
break;
case STRSXP:
LOGICAL(out)[i] = (STRING_ELT(x, i) == NA_STRING);
break;
default:
LOGICAL(out)[i] = NA_LOGICAL;
}
}
UNPROTECT(1);
return out;
')
is_na(c(NA, 1L))
#> [1] TRUE FALSE
is_na(c(NA, 1))
#> [1] TRUE FALSE
is_na(c(NA, "a"))
#> [1] TRUE FALSE
is_na(c(NA, TRUE))
#> [1] TRUE FALSE
Input validation
Usually done at the R side of the function
If done at the C side: TYPEOF or
Exercise:
Finding the C source code for a function
Exercise
> pryr::show_c_source(.Internal(mean()))
mean is implemented by do_summary with op = 1
integer_mean <- inline::cfunction(c(x = "SEXP"), '
R_xlen_t n = XLENGTH(x);
double s = 0.0;
for (R_xlen_t i = 0; i < n; i++) {
int xi = INTEGER_ELT(x, i);
if(xi == NA_INTEGER)
return ScalarReal(R_NaReal);
s += xi;
}
return ScalarReal((double) (s/n));
')
> integer_mean(as.integer(c(2, 2, 4)))
[1] 2.666667
> mean(c(2, 2, 4))
[1] 2.666667