Visium空间转录组分析流程

# Analysis, visualization, and integration of spatial datasets with Seurat at https://satijalab.org/seurat/articles/spatial_vignette.html
# set path ------------------------------------------
rm(list=ls());options(stringsAsFactors=FALSE)
project_dir <- rprojroot::find_rstudio_root_file()
raw_dir <- paste0(project_dir,"/raw-data/")
process_dir <- paste0(project_dir,"/process-data/")
output_dir <- paste0(project_dir,"/output-results/")
temp_dir <- paste0(project_dir,"/temp-data/")
bin_dir <- paste0(project_dir,"/bin/")
script_dir <- paste0(project_dir,"/workflow-scripts/")
work_dir <- output_dir
setwd(work_dir)

# library and set default parameter  ----------------
library(tidyverse)
library(Seurat)
library(SeuratData) #devtools::install_github('satijalab/seurat-data')
library(patchwork)
library(edgeR)
library(SingleCellExperiment)
prefix <- "Seurat-3-"
output_prefix <- paste0(output_dir,prefix)

创建一些函数

# functions -------------
run_clustering <- function(data, resolution = 0.1, npcs=30) {
    
  data <- SCTransform(data, assay = "Spatial", verbose = TRUE)
  data <- RunPCA(data, assay = "SCT", verbose = TRUE)
  data <- FindNeighbors(data, reduction = "pca", dims = 1:npcs)
  data <- FindClusters(data, verbose = TRUE, resolution=resolution)
  data <- RunUMAP(data, dims = 1:npcs)
  return(data)
}

pooling_seu <- function(seu_obj,assay="SCT") {
  cell_metadata <- seu_obj@meta.data
  
  # create sudo id by random sample into 3 sets from each patients
  cell_metadata$sudo_sample_id <- NA
  samples <- unique(cell_metadata$orig.ident)
  nrg <- 3
  for(i in c(1:length(samples))){
    set.seed(i)
    temp_sample_idx <- which(cell_metadata$orig.ident==samples[i])
    sample_sudo_idx <- sample(c(1:nrg),length(temp_sample_idx),replace=T,prob=rep(1/nrg,nrg)) # random sample into 3 sets
    cell_metadata[temp_sample_idx,'sudo_sample_id'] <- paste(samples[i],sample_sudo_idx,sep='_')
  }
  sce <- SingleCellExperiment(assays = list(counts = GetAssayData(seu_obj,slot="count",assay=assay)), colData = cell_metadata)
  summed <- scuttle::aggregateAcrossCells(sce, id=colData(sce)[,'sudo_sample_id']) # aggregate counts by id
  return(summed)
}

dga_treat <- function(DGEList,ref_level=NA,set_regulate=TRUE){
  DGEList$samples$lib.size <- colSums(DGEList$counts)
  if(is.na(ref_level)){
    DGEList$samples$group <- relevel(DGEList$samples$group, ref = levels(DGEList$samples$group)[1] ) 
  } else{
    DGEList$samples$group <- relevel(DGEList$samples$group, ref = ref_level)
  }
  keep <- filterByExpr(DGEList, group=DGEList$samples$group)   #set your own threshold
  table(keep)
  DGEList <- DGEList[keep,, keep.lib.sizes=FALSE]
  DGEList <- calcNormFactors(DGEList)
  # DE
  design <- model.matrix(~DGEList$samples$group)
  DGEList <- estimateDisp(DGEList, design, robust=TRUE)
  fit <- glmQLFit(DGEList, design, robust=TRUE)
  #lrt <- glmQLFTest(fit, coef=ncol(fit$design))
  lrt <- glmTreat(fit, coef=ncol(fit$design), lfc=log2(1.2))
  print(table(decideTests(lrt)))
  res <- topTags(lrt,n = nrow(DGEList))
  if(set_regulate){
    res$table$regulate <- "Normal"
    res$table$regulate[res$table$logFC>0 & res$table$FDR<0.05] <- "Up"
    res$table$regulate[res$table$logFC<0 & res$table$FDR<0.05] <- "Down"
    res$table <- res$table[order(res$table$FDR),]
  }
  return(res)
}

plot_rctd <- function(RCTD,seu_obj, image_name="VLP43_kidney_A1",r=3){
  stopifnot("RCTD"%in% class(RCTD))
  stopifnot("FindClusters"%in% names(seu_obj@commands))
  norm_weights <- normalize_weights(RCTD@results$weights) # normalize the cell type proportions to sum to 1.
  query <- subset(seu_obj,cells = row.names(RCTD@spatialRNA@coords))
  pos <- RCTD@spatialRNA@coords
  annot <- query$seurat_clusters
  p <- vizAllTopics(as.matrix(norm_weights), pos,
                    #topicCols = qualitative_hcl(n=ncol(norm_weights),palette="Dynamic"),
                    topicCols = brewer.pal(n=ncol(norm_weights),name = "Paired"),
                    groups = annot,
                    group_cols = rainbow(length(levels(annot))),
                    #group_cols = rainbow_hcl(length(levels(annot))),
                    r=r,lwd = 0.1) + labs(title=image_name) +
    #ggplot2::guides(colour = "none") +
    theme(plot.background = ggplot2::element_rect(fill = "white"),plot.title = element_text(hjust = 0.5,size=15))
  return(p)
}

Visium空间转录组技术

当前时代的空间转录组技术大致分为五类方向:激光显微切割(laser capture microdissection,LCM),单分子荧光原位杂交(single molecular fluorescent in situ hybridization, smFISH),靶向原位测序(In situ sequencing,ISS),原位阵列捕获(In situ array capture, Array) 和 其他非成像技术(No imaging)。

目前商业用的最广的还是Visium平台,因此我们以Visium数据为例进行演示,进行如聚类,差异表达,反卷积等分析。使用的工具是Seurat,spacexr等包。在单细胞下游分析中,Seurat是一个非常强大的R包,不仅有强大的社区、详细文档说明还有很多内置数据。这里以Analysis, visualization, and integration of spatial datasets with Seurat 数据为例进行空间转录组分析。

1. 数据探索

查看测序数据质量,是否符合生物学背景。如果质量不好,需要进行质量控制。stxBrain已经是过滤后的数据,我们不需要进行质量控制。

#InstallData("stxBrain") # 如果由于网络问题,安装失败。可以将url放在浏览器中下载tar.gz文件,然后本地安装。
brain <- LoadData("stxBrain", type = "anterior1")

# Data preprocessing: Normalization 
plot1 <- VlnPlot(brain, features = "nCount_Spatial", pt.size = 0.1) + NoLegend()
plot2 <- SpatialFeaturePlot(brain, features = "nCount_Spatial") + theme(legend.position = "right")
wrap_plots(plot1, plot2)
# 斑点上分子计数的变化不仅是技术上的问题,而且还取决于组织的解剖结构。
# 例如,神经元(例如皮质白质)耗竭的组织区域可再现地显示出较低的分子数。


png

2. 聚类

和单细胞数据类似,我们需要对数据进行聚类。也可以使用run_clustering函数。

brain <- SCTransform(brain, assay = "Spatial", verbose = FALSE)
# Gene expression visualization
SpatialFeaturePlot(brain, features = c("Hpca", "Ttr"))
p1 <- SpatialFeaturePlot(brain, features = "Ttr", pt.size.factor = 1)
p2 <- SpatialFeaturePlot(brain, features = "Ttr", alpha = c(0.1, 1))
p1 + p2

# Dimensionality reduction, clustering 
brain <- RunPCA(brain, assay = "SCT", verbose = FALSE)
brain <- FindNeighbors(brain, reduction = "pca", dims = 1:30)
brain <- FindClusters(brain, verbose = FALSE)
brain <- RunUMAP(brain, reduction = "pca", dims = 1:30)

# visualization
p1 <- DimPlot(brain, reduction = "umap", label = TRUE)
p2 <- SpatialDimPlot(brain, label = TRUE, label.size = 3)
p1 + p2

png
png

png

3. 检测亚群特异和空间特异差异基因

对不同亚群进行差异分析,对不同空间分布中的细胞进行基因差异分析。

# Detecting spatially-variable features 
de_markers <- FindMarkers(brain, ident.1 = 5, ident.2 = 6)
head(de_markers)
SpatialFeaturePlot(object = brain, features = rownames(de_markers)[1:3], alpha = c(0.1, 1), ncol = 3)
A data.frame: 6 * 5
p_valavg_log2FCpct.1pct.2p_val_adj
<dbl><dbl><dbl><dbl><dbl>
Calb26.427214e-69 3.3368741.0000.5371.135560e-64
Camk2n11.519204e-68-2.4503881.0001.0002.684130e-64
Nrgn1.573095e-68-3.2298260.9711.0002.779344e-64
Stx1a2.104119e-68-2.2383530.7841.0003.717558e-64
Nptxr9.820482e-68-1.9182930.9421.0001.735083e-63
Lingo12.124777e-67-1.9132810.8801.0003.754056e-63

png

#在没有预注释的情况下搜索表现出空间模式的特征
brain <- FindSpatiallyVariableFeatures(brain, assay = "SCT", features = VariableFeatures(brain)[1:1000],selection.method = "moransi") # moransi mothed, markvariogram too slow
#top.features <- head(SpatiallyVariableFeatures(brain, selection.method = "moransi"), 6)
top.features <- c("Calb2","Gng4","Ttr","S100a5","Nrgn","Doc2g")
SpatialFeaturePlot(brain, features = top.features, ncol = 3, alpha = c(0.1, 1))

Computing Moran's I

png

4. 检测分组特异差异基因

由于此数据没有分组信息,我们导入另一个数据进行演示。由于直接对分组细胞进行差异分析会造成假阳性,这里我们用psudo-bulk差异分析

brain <- readRDS("/media/zhangfeng/myData/projects/sp-aging/output-results/70-visium-brain-cluster.RData")
# DEG group--------
table(brain@meta.data$group)
summed <- pooling_seu(brain)
class(summed)
head(summed@colData)
is_ribo<-grepl('Rps|Rpl',row.names(summed))
is_mito<-grepl('mt-',row.names(summed))
y <- DGEList(counts(summed)[(!is_ribo)&(!is_mito),], samples=colData(summed)$sudo_sample_id,group=colData(summed)$group) # DE between types
res <- dga_treat(DGEList=y,ref_level = "young")
head(res$table)

​ aged young ​ 10715 11479

DataFrame with 6 rows and 11 columns
            orig.ident nCount_Spatial nFeature_Spatial       group nCount_SCT
           <character>      <numeric>        <integer> <character>  <numeric>
VLP40_1A_1    VLP40_1A             NA               NA       young         NA
VLP40_1A_2    VLP40_1A             NA               NA       young         NA
VLP40_1A_3    VLP40_1A             NA               NA       young         NA
VLP40_1B_1    VLP40_1B             NA               NA       young         NA
VLP40_1B_2    VLP40_1B             NA               NA       young         NA
VLP40_1B_3    VLP40_1B             NA               NA       young         NA
           nFeature_SCT SCT_snn_res.0.1 seurat_clusters sudo_sample_id
              <integer>        <factor>        <factor>    <character>
VLP40_1A_1           NA              NA              NA     VLP40_1A_1
VLP40_1A_2           NA              NA              NA     VLP40_1A_2
VLP40_1A_3           NA              NA              NA     VLP40_1A_3
VLP40_1B_1           NA              NA              NA     VLP40_1B_1
VLP40_1B_2           NA              NA              NA     VLP40_1B_2
VLP40_1B_3           NA              NA              NA     VLP40_1B_3
                   ids    ncells
           <character> <integer>
VLP40_1A_1  VLP40_1A_1      1023
VLP40_1A_2  VLP40_1A_2      1041
VLP40_1A_3  VLP40_1A_3      1025
VLP40_1B_1  VLP40_1B_1       915
VLP40_1B_2  VLP40_1B_2       915
VLP40_1B_3  VLP40_1B_3       865
-1     0     1 
 87 14105   203 
A data.frame: 6 * 6
logFCunshrunk.logFClogCPMPValueFDRregulate
<dbl><dbl><dbl><dbl><dbl><chr>
Lyz22.68419112.68540735.3260102.446786e-172.992338e-13Up
C4b3.00826763.00881546.8199375.386843e-172.992338e-13Up
Itgax3.85976753.91292631.1720326.236203e-172.992338e-13Up
Cd521.38916821.38992044.5415121.229941e-154.426248e-12Up
Pcdhb90.92958530.93013494.3054682.580643e-157.429671e-12Up
Fcgr2b1.46224171.46505952.7512711.811064e-144.032808e-11Up

5. Deconvolution分析

由于Visium平台的spot不是单细胞分辨率,一般有1-10个细胞,因此我们需要对其进行注释,分析不同细胞的比例。这里使用spacexr包进行分析。

reference <- readRDS('/media/zhangfeng/myData/projects/sp-aging/output-results/4-brain-SCRef.rds')
# control sample
spatial_controls <- RCTD_replicates(seu_obj = brain[,brain$group=="young"])
RCTD_controls <- create.RCTD.replicates(spatial_controls$spatialRNA_replicates, reference, spatial_controls$samples, max_cores = 20)
RCTD_controls <- run.RCTD.replicates(RCTD_controls, doublet_mode = 'full')

# treatment samples
spatial_treats <- RCTD_replicates(seu_obj = brain[,brain$group=="aged"])
RCTD_treats <- create.RCTD.replicates(spatial_treats$spatialRNA_replicates, reference, spatial_treats$samples, max_cores = 20)
RCTD_treats <- run.RCTD.replicates(RCTD_treats, doublet_mode = 'full')

# add into seurat
spot_weight <- combine_weight(RCTD_controls,RCTD_treats)
meta_data <- brain@meta.data %>% rownames_to_column("spot_id") %>% 
  left_join(spot_weight,by="spot_id")
all(meta_data$spot_id==colnames(brain))  
meta_data <- column_to_rownames(meta_data,"spot_id")
brain <- AddMetaData(brain,metadata = meta_data)
saveRDS(brain,file = "/media/zhangfeng/myData/projects/sp-aging/output-results/70-visium-brain-RCTD.RData"))
brain <- readRDS("/media/zhangfeng/myData/projects/sp-aging/output-results/70-visium-brain-RCTD.RData")
head(brain@meta.data)
A data.frame: 6 * 17
orig.identnCount_SpatialnFeature_SpatialgroupnCount_SCTnFeature_SCTSCT_snn_res.0.1seurat_clustersAstrocytesEpendymalImmuneMicrogliaNeuronsOligosPeripheralGliaVascularmax_cell
<chr><dbl><int><chr><dbl><int><fct><fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr>
AAACAAGTATCTCCCA-1_1_1_1VLP40_1A 84213160young153353514330.191436110.00061589160.00498279050.0041975300.32362100.4330522000.0114504420.030644049Oligos
AAACACCAATAACTGC-1_1_1_1VLP40_1A230065649young168625507000.112935160.00138901130.00066171610.0117983060.80541240.0620500250.0020484770.003704872Neurons
AAACAGAGCGACTCCT-1_1_1_1VLP40_1A251316224young167825926000.048719260.00730105560.00263298750.0093575060.91326060.0048276190.0023132840.011587694Neurons
AAACAGCTTTCAGAAG-1_1_1_1VLP40_1A359937074young162985253000.059817600.00192471940.00010266600.0137292420.89182540.0072850130.0029664180.022348983Neurons
AAACAGGGTCTATATT-1_1_1_1VLP40_1A377387220young161635196000.057772060.00146488000.00091530180.0078765700.90050690.0239660300.0006755340.006822758Neurons
AAACATTTCCCGGATT-1_1_1_1VLP40_1A149014544young154604544330.132741070.00260310030.00015585200.0067282770.60666840.2186121930.0107173930.021773681Neurons

6. 绘图

slice_id <- names(RCTD_controls@group_ids)[1]
myRCTD_full <- RCTD_controls@RCTD.reps[[1]]
p_brain <- plot_rctd(myRCTD_full,brain,image_name = slice_id)+ggtitle(slice_id)+
    theme(plot.title=element_text(hjust = 0.5))+labs(fill="Cell_Type",color="Cluster")+
    guides(colour = "none")
p_brain