Skip to content

Commit 4bc7e09

Browse files
author
Pietro ANDREI
committed
move tmp funcs to devel
1 parent a1bd588 commit 4bc7e09

File tree

2 files changed

+0
-212
lines changed

2 files changed

+0
-212
lines changed

R/03_NB_based_cell_comparison.R

Lines changed: 0 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -195,136 +195,3 @@ nbCluster = function(seurat=NULL,n_clust=3:12,seed=347548){
195195
# }
196196
return(seurat)
197197
}
198-
199-
##Alternative function for NB clustering. To be commented...
200-
#test_cluster2 = function(seurat=NULL,k_range = 4:15,max_clust=20,seed=347548){
201-
# nnmat = KanData(seurat,'nnMat')$nnMat
202-
# tot_nn = nnmat[,'tot_nn']
203-
# tot_nn[tot_nn == 0] = 1
204-
# nnmat[,'tot_nn'] = NULL
205-
# nnmat = nnmat %>% dplyr::select_if(is.numeric)
206-
# scaled_nnmat = Matrix::Diagonal(x=(1/tot_nn),names = T) %*% as(as.matrix(nnmat),'CsparseMatrix')
207-
# rownames(scaled_nnmat) = rownames(nnmat)
208-
# scaled_nnmat = as.data.frame(as.matrix(scaled_nnmat))
209-
# clusters = lapply(k_range,function(x){
210-
# set.seed(seed)
211-
# clust = as.data.frame(kmeans(scaled_nnmat,centers=x,iter.max = 20)$cluster)
212-
# colnames(clust) = paste0('K_',x)
213-
# return(clust)
214-
# })
215-
# clusters = purrr::reduce(clusters,cbind) %>% mutate_all(as.factor)
216-
# set.seed(seed)
217-
# clusters = greed::greed(clusters,model=greed::Lca(),alg = greed::Seed(), K=max_clust)
218-
# clusters = greed::clustering(clusters)
219-
# names(clusters) = rownames(na.omit(scaled_nnmat))
220-
# clusters[setdiff(colnames(seurat),names(clusters))] = NA
221-
# seurat[["LCA_nbClust"]] = as.character(clusters[colnames(seurat)])
222-
# return(seurat)
223-
#}
224-
225-
226-
#' @title Calculate Shannon entropy of cell neighbourhood composition
227-
#' @name nb_entropy
228-
#' @description
229-
#' Given cell type composition of single-cell neighbourhood, calculate Shannon entropy associated with each neighbourhood
230-
#' @param seurat seurat object containing Kandinsky data slot with pre-computed neighbourhood composition matrix in `nnMat` slot
231-
#' @returns seurat object with new metadata variable reporting neighbourhood entropy for each cell
232-
#' @export
233-
nb_entropy = function(seurat){
234-
nnmat = KanData(seurat,'nnMat')$nnMat
235-
tot_nn = nnmat[,'tot_nn']
236-
tot_nn[tot_nn == 0] = 1
237-
nnmat[,'tot_nn'] = NULL
238-
nnmat = nnmat %>% dplyr::select_if(is.numeric)
239-
scaled_nnmat = Matrix::Diagonal(x=(1/tot_nn)) %*% as(as.matrix(nnmat),'CsparseMatrix')
240-
seurat$nb_entropy = apply(scaled_nnmat,1,function(x){
241-
logv = log2(x)
242-
logv = ifelse(logv == -Inf,0,logv)
243-
return(-sum(x*logv))})
244-
return(seurat)
245-
}
246-
247-
248-
#' @title Neighbourhood-based multiple linear regression of gene expression values
249-
#' @name nb_lm
250-
#' @description
251-
#' Fit a multiple linear regression for a gene of interest using neighbourhood cell count for each cell type as independent variables
252-
#'
253-
#' @details
254-
#' This function is based on fastLm() function from RcppEigen pakage. In case user wanted to include one or more confounfing (random) variables into the model,
255-
#' the function lmer() in instead used to fit a mixed effect model. In both cases, the output is a data frame reporting regression coefficient and pvalue for each independent variable.
256-
#'
257-
#' @param seurat a Seurat object containing Kandinsky data slot
258-
#' @param label character string specifying the name of the variable to be used for cell type annotation.
259-
#' @param which name of the cell type to consider for fitting linear models. Currently only one cell type is accepted as input
260-
#' @param features character vector containing gene identifiers to be fitted
261-
#' @param layer name of Seurat layer to use for gene expression data
262-
#' @param rand name of random variables to be included in a mixed effect model.
263-
#' @param minprop numeric, genes expressed in less than this proportion of cells will be excluded from the analysis
264-
#'
265-
#' @returns data frame reporting, for each gene, the regression coefficient and p-value associated with each cell type
266-
#' @export
267-
#'
268-
nb_lm = function(seurat,label=NULL,which=NULL,features=NULL,layer='data',rand=NULL,minprop=0.1){
269-
if(length(KanData(seurat,'nnMat'))>0){
270-
mat = KanData(seurat,'nnMat')$nnMat
271-
}else{
272-
mat = nnMat(seurat,label=label,return.seurat=F)
273-
}
274-
if(!is.null(rand)){
275-
rands = FetchData(seurat,rand)
276-
rands = rands[rownames(mat),,drop=F]
277-
mat[,colnames(rands)] = rands
278-
}
279-
label = label %||% KanData(seurat,'nnMat')$col.anno
280-
features = features %||% rownames(seurat)
281-
if(is.null(which)){
282-
warning('argument "which" not specified. All cells in the dataset will be considered as a unique class')
283-
}else{
284-
cells = rownames(seurat@meta.data[which(seurat@meta.data[[label]] %in% which),])
285-
mat = mat[cells,]
286-
}
287-
#Prepare gene expression data (keep only expressed genes)
288-
expr = LayerData(seurat,layer)
289-
expr = expr[,rownames(mat)]
290-
keep = which(rowSums(expr >0) >=round(ncol(expr)*minprop))
291-
expr = expr[keep,]
292-
features = intersect(features,rownames(expr))
293-
294-
message('testing ',length(features),' genes detected in at least ',round(ncol(expr)*minprop),' cells')
295-
expr = expr[features,]
296-
297-
#Prepare linear model formula
298-
labels = unique(as.character(seurat@meta.data[,c(label)]))
299-
labels = intersect(labels,colnames(mat))
300-
if(is.null(rand)){
301-
formula =paste0('RcppEigen::fastLm( scale(expr[x,]) ~ ',paste(paste0('mat[,"',labels,'"]'),collapse="+"),')')
302-
}else{
303-
formula =paste0('lmerTest::lmer( scale(expr[x,]) ~ ',paste(paste0('mat[,"',labels,'"]'),collapse="+"),'+',
304-
paste(paste0('(1|mat[,"',rand,'"])'),collapse="+"),',control=lme4::lmerControl(calc.derivs = FALSE,optCtrl = list(algorithm = "NLOPT_LN_BOBYQA")))')
305-
}
306-
if(is.null(rand)){
307-
message('Fitting linear models for ',length(features),' genes...')
308-
}else{
309-
message('Fitting linear mixed models (',paste(rand,collapse=','),' as random effect(s))',' for ',length(features),' genes...')
310-
}
311-
#Fit linear (mixed) model for all genes to be tested
312-
all_genes = lapply(features,function(x){
313-
flmmod = suppressMessages(eval(parse(text=formula)))
314-
if(is.null(rand)){
315-
res = as.data.frame(summary(flmmod)[[1]][-1,c(3,4)])
316-
}else{
317-
res = as.data.frame(stats::anova(flmmod)[,c(5,6)])
318-
}
319-
res$gene = x
320-
res$var = labels
321-
rownames(res) = NULL
322-
return(res)
323-
})
324-
#Combine results from all genes into a unique table
325-
all_genes = purrr::reduce(all_genes,rbind)
326-
colnames(all_genes)[c(1,2)] = c('stat','pval')
327-
colnames(all_genes)[4] = label
328-
message('...done!')
329-
return(all_genes)
330-
}

R/06_Utils.R

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -263,54 +263,6 @@ resample_cells = function(seurat=NULL,label='cell_types',spatial=T,maxcells=1000
263263
}
264264
}
265265

266-
267-
#' @title subsample neighbour network links
268-
#' @name subsample_nb
269-
#' @description
270-
#' Given a Kandinsky neighbour network in a listw format, this function randomly subset
271-
#' the number of neighbours assigned to each cell up to a user-defined maximum number of neighbours.
272-
#' @details
273-
#' In order to be compatible with most of Kandinsky functions (due to its dependency on spdep package), the resulting
274-
#' neighbour matrix must be symmetric even after the subsetting. Therefore, an additional step to ensure matrix symmetry
275-
#' is applied after the subsetting, and for this reason some cells might still have a number of neighbours higher than the user-provided threshold.
276-
#'
277-
#' @param seurat a Seurat object containing Kandinsky data slot
278-
#' @param exp_links numeric, maximum number of expected neighbours per cell after the subsetting
279-
#' @param seed numeric, random seed for reproducibility
280-
#' @returns updated seurat object network in a listw format
281-
#'
282-
#' @export
283-
subsample_nb = function(seurat=NULL,exp_links=6,seed=347548){
284-
mat= as(KanData(seurat,'nb'),'CsparseMatrix')
285-
#if(inherits(listw,'listw')){data='listw';listw=as(listw,'CsparseMatrix')}
286-
avg_size= median(Matrix::rowSums(mat))
287-
message('Current median number of neighbour links per cell is ',avg_size,'. Trying to reduce to ',exp_links,'...')
288-
#ratio = (exp_links/avg_size)
289-
#prop = (1-ratio/2)
290-
set.seed(seed)
291-
colN <- diff(mat@p)
292-
indx <- cbind(mat@i+1,rep(seq_along(colN),colN))
293-
indx = cbind(indx,1:length(mat@x))
294-
indx = as.data.frame(indx)
295-
indx = dplyr::group_by(indx,.data[["V2"]]) %>%
296-
dplyr::mutate(size=ifelse(dplyr::n()>(exp_links/2),(exp_links/2),dplyr::n())) %>%
297-
dplyr::filter(.data[["V1"]] %in% sample(.data[["V1"]],size=unique(.data[["size"]]))) %>%
298-
dplyr::ungroup() %>%
299-
dplyr::select(.data[["V3"]])
300-
301-
#mat@x[sample(length(mat@x),size=round(length(mat@x)*prop))] = 0
302-
mat@x[setdiff(1:length(mat@x),indx$V3)]=0
303-
mat = Matrix::drop0(mat)
304-
message('Making neighbour matrix symmetric...')
305-
mat = mat + Matrix::t(mat)
306-
mat@x[mat@x>1]=1
307-
new_avg_size = median(Matrix::rowSums(mat))
308-
message('Returning updated neighour network with a median number of ',new_avg_size,' links per cell')
309-
KanData(seurat,'nb') = suppressWarnings(spdep::mat2listw(mat,row.names=rownames(mat),style = 'B',zero.policy=T))
310-
return(seurat)
311-
}
312-
313-
314266
#' @name global_univ_spatcor
315267
#' @title Compute global Moran's I spatial autocorrelation statistic
316268
#' @param seurat a Seurat object containing Kandinsky data (`KanData()`)
@@ -428,37 +380,6 @@ get_nbcounts = function(seurat=NULL,label=NULL,which=NULL){
428380
return(nb)
429381
}
430382

431-
#' @title Calculate gene-specific contamination score
432-
#' @name gene_contamination_score
433-
#' @description
434-
#' For each gene expressed within a dataset, given a cell type of interest,
435-
#' this function will calculate a contamination score expressed as the fold change between the average expression of each cell belonging to the defined cell type
436-
#' and the average expression of their neighbouring cells defined by Kandinsky belonging to different cell types. A high or low contamination score indicates a lower or higher expression of a gene within a cell than its neighbouring cells, respectively.
437-
#'
438-
#' @param seurat Seurat object containing Kandinsky data slot
439-
#' @param label character string indicating meta data variable containing cell type annotation
440-
#' @param which character vector indicating for which cell type the gene contamination scores will be calculated
441-
#' @returns named vector of contamination scores, with each score named with its corresponding gene symbol
442-
#' @export
443-
gene_contamination_score = function(seurat=NULL,label=NULL,which=NULL){
444-
envmat = get_nbcounts(seurat,label,which)
445-
if(length(KanData(seurat,'nnMat'))>0){
446-
tot_nn = KanData(seurat,'nnMat')$nnMat[,'tot_nn']
447-
}else{
448-
tot_nn = nnMat(seurat,label=label,return.seurat=F)[,'tot_nn']
449-
}
450-
if(!is.null(label) & !is.null(which)){
451-
tot_nn = tot_nn[seurat@meta.data[[label]] %in% which]
452-
selfmat = Matrix::rowMeans(LayerData(seurat,'counts')[,seurat@meta.data[[label]] %in% which])
453-
}else{
454-
selfmat = Matrix::rowMeans(LayerData(seurat,'counts'))
455-
}
456-
envmat = Matrix::t(Matrix::t(envmat) %*% Matrix::Diagonal(x=1/tot_nn))
457-
envmat = Matrix::colMeans(envmat)
458-
scores = setNames(nm=rownames(seurat),object=envmat/selfmat)
459-
return(scores)
460-
}
461-
462383
#################################
463384
###########COSMX UTILS###########
464385
#################################

0 commit comments

Comments
 (0)