Skip to content

Commit 1c59be9

Browse files
committed
Performance improvement
1 parent 3c913eb commit 1c59be9

File tree

12 files changed

+197
-37
lines changed

12 files changed

+197
-37
lines changed

DESCRIPTION

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: diffusr
22
Type: Package
33
Title: Network Diffusion Algorithms
4-
Version: 0.2.1
4+
Version: 0.2.2
55
Date: 2018-04-20
66
Authors@R: person("Simon", "Dirmeier",
77
email = "simon.dirmeier@gmx.de",
@@ -27,7 +27,9 @@ Imports:
2727
methods,
2828
checkmate,
2929
matrixStats,
30-
sparseMatrixStats
30+
sparseMatrixStats,
31+
memuse,
32+
pryr
3133
Suggests:
3234
knitr,
3335
rmarkdown,

NAMESPACE

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,22 @@ importFrom(checkmate,assert_int)
1414
importFrom(checkmate,assert_integer)
1515
importFrom(checkmate,assert_logical)
1616
importFrom(checkmate,assert_number)
17+
importFrom(checkmate,assert_numeric)
1718
importFrom(checkmate,assert_vector)
1819
importFrom(checkmate,check_matrix)
1920
importFrom(checkmate,check_numeric)
2021
importFrom(checkmate,test_atomic_vector)
22+
importFrom(checkmate,test_logical)
2123
importFrom(checkmate,test_matrix)
2224
importFrom(checkmate,test_numeric)
2325
importFrom(igraph,components)
2426
importFrom(igraph,graph_from_adjacency_matrix)
2527
importFrom(matrixStats,colSums2)
28+
importFrom(memuse,Sys.meminfo)
29+
importFrom(memuse,Sys.swapinfo)
30+
importFrom(memuse,howbig)
31+
importFrom(methods,as)
2632
importFrom(methods,is)
33+
importFrom(pryr,object_size)
2734
importFrom(sparseMatrixStats,colAnyNAs)
2835
useDynLib(diffusr)

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# diffusr v0.2.2
2+
3+
* Bug fix
4+
* Performance improvement for sparse matrix
5+
16
# diffusr v0.2.1
27

38
* Allow sparse matrix as input

R/RcppExports.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ stoch_col_norm_ <- function(W) {
99
.Call('_diffusr_stoch_col_norm_', PACKAGE = 'diffusr', W)
1010
}
1111

12+
stoch_col_norm_s <- function(W) {
13+
.Call('_diffusr_stoch_col_norm_s', PACKAGE = 'diffusr', W)
14+
}
15+
1216
laplacian_ <- function(W) {
1317
.Call('_diffusr_laplacian_', PACKAGE = 'diffusr', W)
1418
}

R/mat_util.R

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,28 @@
2525
#' @param obj \code{\link[base]{matrix}} (or
2626
#' \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}},
2727
#' \code{\link[base]{vector}}) that is stochstically normalized
28+
#' @param no_r Do not use R for normalization
2829
#' @param ... additional params
2930
#' @return returns the normalized matrix/vector)
3031
#'
32+
#' @useDynLib diffusr
33+
#'
3134
#' @importFrom checkmate assert check_matrix test_numeric test_atomic_vector
35+
#' test_logical
36+
#' @importFrom memuse Sys.meminfo Sys.swapinfo howbig
37+
#' @importFrom pryr object_size
38+
#' @importFrom Rcpp sourceCpp
3239
#'
3340
#' @examples
3441
#' W <- matrix(abs(rnorm(10000)), 100, 100)
3542
#' stoch.W <- normalize.stochastic(W)
36-
normalize.stochastic <- function(obj, ...) {
43+
normalize.stochastic <- function(obj, no_r = NULL, ...) {
3744
is_matrix <- FALSE
3845
is_sparse <- FALSE
46+
if (!test_logical(no_r, len = 1, any.missing = FALSE, all.missing = FALSE,
47+
null.ok = FALSE)) {
48+
no_r <- FALSE
49+
}
3950
if (test_numeric(obj, lower = 0, finite = TRUE, any.missing = FALSE,
4051
all.missing = FALSE, null.ok = FALSE) &&
4152
test_atomic_vector(obj)) {
@@ -55,16 +66,54 @@ normalize.stochastic <- function(obj, ...) {
5566
is_matrix <- TRUE
5667
}
5768
if (is_matrix) {
58-
sums <- colSums3(obj, is_sparse)
59-
if (!all(.equals.double(sums, 1, .001))) {
60-
message("normalizing column vectors!")
61-
empt_col_val <- 1.0 / ncol(obj)
69+
if (no_r) {
70+
if (is_sparse) {
71+
obj <- as(stoch_col_norm_s(obj), "dgCMatrix")
72+
} else {
73+
obj <- stoch_col_norm_(obj)
74+
}
75+
} else {
76+
# check memory usage;
77+
# if there is a memory shortage, then call C function directly
78+
n <- as.numeric(ncol(obj))
79+
memory_usage <- Sys.meminfo()
80+
swap_usage <- Sys.swapinfo()
81+
free_ram <- memory_usage$freeram@size
82+
free_ram <- free_ram * switch(substring(memory_usage$freeram@unit, 1, 1),
83+
"B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1,
84+
"G" = 1024, "T" = 1048576,
85+
.default = 1073741824)
86+
swap_ram <- swap_usage$freeswap@size
87+
swap_ram <- swap_ram * switch(substring(swap_usage$freeswap@unit, 1, 1),
88+
"B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1,
89+
"G" = 1024, "T" = 1048576,
90+
.default = 1073741824)
91+
free_ram <- free_ram + swap_ram
92+
object_ram_p <- howbig(n, n, unit = "MiB")@size # size in practice
93+
object_ram_t <- as.numeric(object_size(obj)) / 1e6 # size in theory (MiB)
94+
95+
# if memory is bigger than the temporary variables, then use R
96+
if ((free_ram > object_ram_t * 4)) {
97+
sums <- colSums3(obj, is_sparse)
98+
if (!all(.equals.double(sums, 1, .001))) {
99+
message("normalizing column vectors!")
100+
empt_col_val <- 1.0 / n
62101

63-
obj <- obj / sums[col(obj)]
64-
# check if need wipe zeros
65-
zeros <- which(sums < empt_col_val)
66-
if (length(zeros)) {
67-
obj[, zeros] <- 0.00001
102+
obj <- obj / sums[col(obj)]
103+
# check if need wipe zeros
104+
zeros <- which(sums < 0.00001)
105+
if (length(zeros)) {
106+
obj[, zeros] <- empt_col_val
107+
}
108+
}
109+
} else if (free_ram < object_ram_p) {
110+
stop("You don't have sufficient memory to normalize. Required: ",
111+
round(object_ram_p / 1024, digits = 3), " GiB, but ",
112+
round(free_ram / 1024, digits = 3), " available.")
113+
} else {
114+
warning("You have just enough memory to normalize; consider ",
115+
"increasing your physical memory capacity in the future!")
116+
obj <- stoch_col_norm_s(obj)
68117
}
69118
}
70119
} else {
@@ -83,6 +132,8 @@ normalize.stochastic <- function(obj, ...) {
83132
#' @param ... additional params
84133
#' @return returns the Laplacian
85134
#'
135+
#' @useDynLib diffusr
136+
#'
86137
#' @importFrom checkmate assert check_matrix
87138
#' @importFrom Rcpp sourceCpp
88139
#'

R/mrw.R

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,9 @@
8080
#'
8181
#' @useDynLib diffusr
8282
#'
83-
#' @importFrom checkmate assert_number assert_int assert_logical test_numeric
84-
#' assert test_matrix check_numeric
83+
#' @importFrom checkmate assert_number assert_int assert_logical assert_numeric
84+
#' assert test_matrix check_numeric test_atomic_vector
85+
#' @importFrom methods as
8586
#' @importFrom Rcpp sourceCpp
8687
#'
8788
#' @examples
@@ -112,7 +113,7 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
112113
n_elements <- nrow(graph)
113114
if (is.dgCMatrix(graph)) {
114115
assert_dgCMatrix(graph)
115-
sparse <- TRUE
116+
sparse <- allow.ergodic <- TRUE
116117
} else {
117118
assert(
118119
test_matrix(graph, mode = "numeric", min.rows = 3, nrows = n_elements,
@@ -125,8 +126,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
125126
}
126127

127128
# convert p0 if p0 is vector
128-
if (test_numeric(p0, lower = 0, len = n_elements, finite = TRUE,
129-
any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)) {
129+
if (test_atomic_vector(p0)) {
130+
assert_numeric(p0, lower = 0, len = n_elements, finite = TRUE,
131+
any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)
130132
p0 <- as.matrix(p0)
131133
} else {
132134
assert(
@@ -150,6 +152,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
150152

151153
if (sparse) {
152154
# sparse matrix
155+
if (!is.dgCMatrix(stoch.graph)) {
156+
stoch.graph <- as(stoch.graph, "CsparseMatrix")
157+
}
153158
l <- mrwr_s(normalize.stochastic(p0),
154159
stoch.graph, r, thresh, niter, do.analytical)
155160
} else {

inst/include/diffusr_RcppExports.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,27 @@ namespace diffusr {
6767
return Rcpp::as<MatrixXd >(rcpp_result_gen);
6868
}
6969

70+
inline SpMat stoch_col_norm_s(const SpMat& W) {
71+
typedef SEXP(*Ptr_stoch_col_norm_s)(SEXP);
72+
static Ptr_stoch_col_norm_s p_stoch_col_norm_s = NULL;
73+
if (p_stoch_col_norm_s == NULL) {
74+
validateSignature("SpMat(*stoch_col_norm_s)(const SpMat&)");
75+
p_stoch_col_norm_s = (Ptr_stoch_col_norm_s)R_GetCCallable("diffusr", "_diffusr_stoch_col_norm_s");
76+
}
77+
RObject rcpp_result_gen;
78+
{
79+
RNGScope RCPP_rngScope_gen;
80+
rcpp_result_gen = p_stoch_col_norm_s(Shield<SEXP>(Rcpp::wrap(W)));
81+
}
82+
if (rcpp_result_gen.inherits("interrupted-error"))
83+
throw Rcpp::internal::InterruptedException();
84+
if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen))
85+
throw Rcpp::LongjumpException(rcpp_result_gen);
86+
if (rcpp_result_gen.inherits("try-error"))
87+
throw Rcpp::exception(Rcpp::as<std::string>(rcpp_result_gen).c_str());
88+
return Rcpp::as<SpMat >(rcpp_result_gen);
89+
}
90+
7091
inline MatrixXd laplacian_(const MatrixXd& W) {
7192
typedef SEXP(*Ptr_laplacian_)(SEXP);
7293
static Ptr_laplacian_ p_laplacian_ = NULL;

man/normalize.stochastic.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/RcppExports.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,40 @@ RcppExport SEXP _diffusr_stoch_col_norm_(SEXP WSEXP) {
8484
UNPROTECT(1);
8585
return rcpp_result_gen;
8686
}
87+
// stoch_col_norm_s
88+
SpMat stoch_col_norm_s(const SpMat& W);
89+
static SEXP _diffusr_stoch_col_norm_s_try(SEXP WSEXP) {
90+
BEGIN_RCPP
91+
Rcpp::RObject rcpp_result_gen;
92+
Rcpp::traits::input_parameter< const SpMat& >::type W(WSEXP);
93+
rcpp_result_gen = Rcpp::wrap(stoch_col_norm_s(W));
94+
return rcpp_result_gen;
95+
END_RCPP_RETURN_ERROR
96+
}
97+
RcppExport SEXP _diffusr_stoch_col_norm_s(SEXP WSEXP) {
98+
SEXP rcpp_result_gen;
99+
{
100+
Rcpp::RNGScope rcpp_rngScope_gen;
101+
rcpp_result_gen = PROTECT(_diffusr_stoch_col_norm_s_try(WSEXP));
102+
}
103+
Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error");
104+
if (rcpp_isInterrupt_gen) {
105+
UNPROTECT(1);
106+
Rf_onintr();
107+
}
108+
bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen);
109+
if (rcpp_isLongjump_gen) {
110+
Rcpp::internal::resumeJump(rcpp_result_gen);
111+
}
112+
Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error");
113+
if (rcpp_isError_gen) {
114+
SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen);
115+
UNPROTECT(1);
116+
Rf_error("%s", CHAR(rcpp_msgSEXP_gen));
117+
}
118+
UNPROTECT(1);
119+
return rcpp_result_gen;
120+
}
87121
// laplacian_
88122
MatrixXd laplacian_(const MatrixXd& W);
89123
static SEXP _diffusr_laplacian__try(SEXP WSEXP) {
@@ -445,6 +479,7 @@ static int _diffusr_RcppExport_validate(const char* sig) {
445479
if (signatures.empty()) {
446480
signatures.insert("MatrixXd(*heat_diffusion_)(const MatrixXd&,const MatrixXd&,const double)");
447481
signatures.insert("MatrixXd(*stoch_col_norm_)(const MatrixXd&)");
482+
signatures.insert("SpMat(*stoch_col_norm_s)(const SpMat&)");
448483
signatures.insert("MatrixXd(*laplacian_)(const MatrixXd&)");
449484
signatures.insert("MatrixXd(*laplacian_s)(const SpMat&)");
450485
signatures.insert("VectorXd(*node_degrees_)(const MatrixXd&)");
@@ -463,6 +498,7 @@ static int _diffusr_RcppExport_validate(const char* sig) {
463498
RcppExport SEXP _diffusr_RcppExport_registerCCallable() {
464499
R_RegisterCCallable("diffusr", "_diffusr_heat_diffusion_", (DL_FUNC)_diffusr_heat_diffusion__try);
465500
R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_", (DL_FUNC)_diffusr_stoch_col_norm__try);
501+
R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_s", (DL_FUNC)_diffusr_stoch_col_norm_s_try);
466502
R_RegisterCCallable("diffusr", "_diffusr_laplacian_", (DL_FUNC)_diffusr_laplacian__try);
467503
R_RegisterCCallable("diffusr", "_diffusr_laplacian_s", (DL_FUNC)_diffusr_laplacian_s_try);
468504
R_RegisterCCallable("diffusr", "_diffusr_node_degrees_", (DL_FUNC)_diffusr_node_degrees__try);
@@ -480,6 +516,7 @@ RcppExport SEXP _diffusr_RcppExport_registerCCallable() {
480516
static const R_CallMethodDef CallEntries[] = {
481517
{"_diffusr_heat_diffusion_", (DL_FUNC) &_diffusr_heat_diffusion_, 3},
482518
{"_diffusr_stoch_col_norm_", (DL_FUNC) &_diffusr_stoch_col_norm_, 1},
519+
{"_diffusr_stoch_col_norm_s", (DL_FUNC) &_diffusr_stoch_col_norm_s, 1},
483520
{"_diffusr_laplacian_", (DL_FUNC) &_diffusr_laplacian_, 1},
484521
{"_diffusr_laplacian_s", (DL_FUNC) &_diffusr_laplacian_s, 1},
485522
{"_diffusr_node_degrees_", (DL_FUNC) &_diffusr_node_degrees_, 1},

src/mat_util.cpp

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,51 @@
2222

2323
#include "../inst/include/diffusr.h"
2424

25+
template <typename T>
26+
T stoch_col_norm_t(const T& W) {
27+
size_t n = W.rows();
28+
T res(n, n);
29+
VectorXd colsums = W.transpose() * VectorXd::Ones(n);
30+
//res.reserve((colsums * n).sum());
31+
const double empt_col_val = 1.0 / n;
32+
const double zero_col = 0.00001;
33+
34+
for (unsigned int i = 0; i < n; ++i) {
35+
if (colsums[i] <= zero_col) {
36+
for (size_t j = 0; j < n; ++j) {
37+
res.coeffRef(j, i) = empt_col_val;
38+
}
39+
} else {
40+
for (size_t j = 0; j < n; ++j) {
41+
res.coeffRef(j, i) = W.coeff(j, i) / colsums(i);
42+
}
43+
}
44+
}
45+
46+
return res;
47+
}
48+
2549
//' Column normalize a matrix, so that it is stochastic.
2650
//'
2751
//' @noRd
2852
//' @param W the adjacency matrix to be normalized
2953
//' @return returns the normalized matrix
3054
// [[Rcpp::interfaces(r, cpp)]]
3155
// [[Rcpp::export]]
32-
MatrixXd stoch_col_norm_(const MatrixXd& W) {
33-
MatrixXd res(W.rows(), W.cols());
34-
VectorXd colsums = W.colwise().sum();
35-
const double empt_col_val = 1.0 / W.cols();
36-
const double zero_col = 0.00001;
37-
#pragma omp parallel for
38-
for (unsigned int i = 0; i < W.cols(); ++i)
39-
{
40-
if (colsums[i] <= zero_col)
41-
res.col(i).fill(empt_col_val);
42-
else
43-
res.col(i) = W.col(i) / colsums(i);
44-
}
56+
MatrixXd stoch_col_norm_(const MatrixXd &W) {
57+
return stoch_col_norm_t(W);
58+
}
4559

60+
//' Column normalize a matrix, so that it is stochastic.
61+
//'
62+
//' @noRd
63+
//' @param W the adjacency matrix to be normalized
64+
//' @return returns the normalized matrix
65+
// [[Rcpp::interfaces(r, cpp)]]
66+
// [[Rcpp::export]]
67+
SpMat stoch_col_norm_s(const SpMat &W) {
68+
SpMat res = stoch_col_norm_t(W);
69+
res.makeCompressed();
4670
return res;
4771
}
4872

@@ -141,10 +165,11 @@ MatrixXd hub_normalize_t(const temp &W) {
141165

142166
#pragma omp parallel for
143167
for (unsigned int i = 0; i < n; ++i) {
168+
double mh;
144169
for (unsigned int j = 0; j < n; ++j) {
145170
if (W.coeff(i, j) != 0) {
146171
// Equivlant with: double mh = fc(node_degrees[i] / node_degrees[j]);
147-
double mh = node_degrees[i] / node_degrees[j];
172+
mh = node_degrees[i] / node_degrees[j];
148173
res.coeffRef(i, j) = min(1.0, mh) / node_degrees[i];
149174
}
150175
}

0 commit comments

Comments
 (0)