## ----setup, echo=FALSE,error=FALSE,message=FALSE------------------------- datadir= "/data/IntPathAnalysis/BRCA/" library(knitcitations) library(bibtex) library(knitr) library(AnnotationDbi) library(org.Hs.eg.db) allbib = read.bibtex(file.path(".", "bioc.bib")) opts_chunk$set(cache=TRUE,图路径='figure/',缓存。= ' cache-local路径 /') ## ---- loadBRCA ------------------------------------------------------------ 负载(文件。路径(datadir“BRCA_TCGA_79_filtered_matchedNames_Basal + LuminalA.RData”))总结(BRCA_TCGA_79) # #——codeToCreateDataset呼应= FALSE, eval = FALSE,消息= FALSE ---------- ## # 文件使用wget从https://tcga-data.nci.nih.gov/docs/publications/brca_2012下载 / ## ## # 读取每个文件# # microrna的< -read.table(“BRCA.348.mimat.txt”,标题= TRUE,## miRNAprecursor<-read.table(" brca .348. presor .txt", header=TRUE, sep=",", row.name =",")n # #艾姆斯= 1)# # RNAseq < -read.table(“BRCA.exp.348.med.txt”,9 =“t \”,标题= TRUE, row.names = 1,问# # uote ="\"") ## 甲基< -read.table(“BRCA.Methylation.574probes.802.txt”,9 =“t \”,头= TRUE) # # GISTIC < -read.table(“brca_scna_all_thresholded.by_genes.txt”,9 =“t \”,标题= TR # #问题)# # RPPA < -read.table(“rppadata - 403 -桑普- 171 ab - trimmed.txt”,sep="\t", header=TRUE, ro# # w.names=1) ## ## #创建这些数据矩阵名称的向量## rawData<-ls() ## ## #在GISTIC数据中,前3列是基因注释。忽略这些。# # GISTIC_fData = GISTIC [1:3] # # GISTIC = GISTIC (, - c (1:3 )] ## ## # 为每个数据集创建一系列sampleNames # # colNames < -lapply (rawData,函数(x) cbind (colNames (get (x)), substr (colNames (ge # # t (x)) 1、12)))# #名称(colNames) = rawData # # # # #获得相交的列表sampleNames # # BRCA_348 <附些(相交(colNames[[“RPPA”]][2],colNames [[" miRNAprecursor "]] [2 ])) ## ## ## # 过滤器每个数据集348个相交的样本为rawData(我){# # # # colNames[[我]]< -colNames[[我]][colNames[[我]][2]% % BRCA_348 [[1]]] # # rownames (colNames[[我]])< -colNames[[我]][2]# # colNames[[我]]< -colNames[[我]][BRCA_348 [[1 ]],] ## } ## ## rm(我 ) ## ## # 过滤数据集# # BRCA_TCGA < -列表(GISTIC = GISTIC [, colNames[[“GISTIC”]][1]],# # microrna = microrna [, colNames[[“microrna”]][1]],# # miRNAprecursor = miRNAprecursor [, colNames[[“miRNAprecursor”]][1]],# # RNAseq = RNAseq [, colNames[[“RNAseq”]][1]],# # =甲基甲酯[,colNames[[“甲基]][1]],# # RPPA = RPPA (,colNames[["RPPA"]][,1]]) ## ## ## BRCA_TCGA$LOH= BRCA_TCGA$LOH, # BRCA_TCGA$LOH= apply(BRCA_TCGA$LOH, 2, function(x) ifelse(x==-1, 1,0)) # BRCA_TCGA$GISTIC ## BRCA_TCGA$CNA= BRCA_TCGA$CNA,2, function(x) ifelse(x==-1, 0,x)) ## ## lapply(BRCA_TCGA, dim) ## lapply(BRCA_TCGA, dim) function(x) sum(is.na(x))) ## ## ## Thres=0.9 ## xx<-apply(BRCA_TCGA$LOH, 1, function(x) sum(x==0) stantile(xx)[2] ## brca_tcga $ rnaseq <-brca_tcga $ rnaseq [xx,] #######获取NA和0 ## TT <-APPLY(BRCA_TCGA $ RNASEQ,2,功能(X)IFELSE(is.na(x),10 ^ -6,x))## tt < - aapply(tt,2,function(x)Ifelse(x == 0,10 ^ -6,x))## brca_tcga $RNASEQ <-TT ## ## LAPPLEY(BRCA_TCGA,功能(x)和(is.na(x)))## lapply(brca_tcga,dim)#### save(brca_tcga,file =“brca_tcga_348_filtered.rdata”)## ## ##系统(“Wget http://www.nature.com/nature/journal/v490/n7418/extref/nature114​​12-s2.zip”)##系统(“Unzip Nitial11412-S2.zip”)##系统(“XLS2CSV Nature114​​12-S2 /补充表1-4.xls”)## Clin = Read.csv(“补充表1.CSV”,标题= True,Row.Names = 1,Skip = 1)##gsub(“\\。”,“ - ”,brca_348 [[1]])%在%rownames(clin)## brca_tcga $ clin <-clin [gsub(“\\。”,“ - ”,brca_348 [[1]]),] ## ##保存(brca_tcga,file =“brca_tcga_348_filtered.rdata”)## ## ##使所有样本名称相同(对于OMICADE4)并保存SAMPLENAMED DATO ## LAPPLEY(brca_tcga [1:7],函数(x)全部(substr(colnames(x),1,12)== brca_348 [[1]] ##))## for(i在1:7中)colnames(brca_tcga [[i]])< - gsub(“\\。”,“ - ”,brca_348 [[1]])##colnames <-lapply(colnames,function(x)cbind(x,gsub(“\\。”,“ - ”,brca_348 [[1]))##)## for(i在colnames)的rowname(i)< - Rowname(BRCA_TCGA $ CLIN)## BRCA_TCGA $ SAMPLENMEINFO <-COLNAMES ##保存(BRCA_TCGA,FILE =“BRCA_TCGA_348_FILTERED_MATCHEDNAMES.RDATA”)## ############LUM <-BRCA_TCGA $ CLIN $ RPPA.clusters ==“luma”&brca_tcga $ clin $ pam50.mrna ==“luminal a”&brca_tcga $ clin $ er.status ==“正”## Basal < - BRCA_TCGA $ CLIN $ RPPA.CLUSTERS ==“基础“&BRCA_TCGA $ CLIN $ PAM50.MRNA ==”基础“和BRCA_TCGA $ CLIN $ er.Status ==”否定“##表(基础)##表(LUM)##表(基础)lum)## brca_tca_79 <-lapply(brca_tca [1:7],函数(x)return(x [,rownames(brca_tcga $ cl ##)[basal | lum]]))## brca_tcga_79 $ clin < - brca_tcga$ clin [colnames(brca_tca_79 [[1]]),] ## save(brca_tcga_79,file =“brca_tcga_79_filtered_matchednames_basal + luminala.rdata”)## ---- PCA ------------------------------------------------------------------------------- library(ade4) BRCApca<-dudi.pca(BRCA_TCGA_79$RNAseq, scannf=FALSE, nf=5) print(BRCApca) ## ----pcasumm------------------------------------------------------------- summary(BRCApca) ## ----plotcoa,message=FALSE, warning=FALSE-------------------------------- library(made4) cl<-as.character(BRCA_TCGA_79$clin$PAM50.mRNA) BRCAord= ord(BRCA_TCGA_79$RNAseq, classvec=factor(cl)) summary(BRCAord$ord) plot(BRCAord, nlab=3, arraylabels=rep("T", 79)) ## ----plotarrays2, message=FALSE----------------------------------------- par(mfrow=c(2,1)) plotarrays(BRCAord$ord$co, classvec=BRCA_TCGA_79$clin$PAM50.mRNA) plotgenes(BRCAord, n=5, col="red") ## ----topgenes------------------------------------------------------------ ax1<- topgenes(BRCAord, axis=1, n=5) ## ----BRCAciaplot, echo=FALSE, warning=FALSE------------------------------ BRCAcia<-cia(BRCA_TCGA_79$RNAseq, BRCA_TCGA_79$RPPA) BRCAcia$coinertia plot(BRCAcia, classvec=BRCA_TCGA_79$clin$PAM50.mRNA, nlab=3, clab=0, cpoint=3 ) ## ----ciaMoreCocircle----------------------------------------------------- par(mfrow=c(1,2)) s.corcircle(BRCAcia$coinertia$aX) s.corcircle(BRCAcia$coinertia$aY) ## ----mcia, cache=TRUE, eval=FALSE---------------------------------------- ## # Check that there are no zero or low variant low count rows. ## for (i in 1:7) BRCA_TCGA_79[[i]]<-BRCA_TCGA_79[[i]][!apply(BRCA_TCGA_79[[i]],1, sum)==min(BRCA_TCGA_79[[i]]),] ## ## library(omicade4) ## mcia79<-mcia(BRCA_TCGA_79[1:7]) ## save(mcia79, file="mciaRes.RData") ## ----loadmCia, echo=FALSE------------------------------------------------ require(omicade4) load(file.path(datadir,"mciaRes.RData")) ## ----plotmcia, warning=FALSE, message=FALSE------------------------------ plot(mcia79, axes=1:2, sample.lab=FALSE, sample.legend=FALSE, phenovec=as.numeric(BRCA_TCGA_79$clin$PAM50.mRNA), gene.nlab=2, df.color=c("navy", "cyan", "magenta", "red4", "brown","yellow", "orange"),df.pch=2:8) ## ----rv, echo=FALSE------------------------------------------------------ # RV to reference. Computes the RV coefficient between the representations of individual Cases ($Tl1) with the synthetic variables (reference, $SynVar). RV.mcoa <- function(m,...){ # see RV.rtest # Thanks Pierre Bady (Lyon) # require(ade4) if (!inherits(m, "mcoa")) stop("non convenient data") blo <- sort(unique(m$TL[, 1])) nblo <- length(blo) res <- NULL for(i in 1:nblo){ X <- scale(m$SynVar, scale = FALSE) Y <- scale(m$Tl1[m$TL[,1]==i,], scale = FALSE) X <- X/(sum(svd(X)$d^4)^0.25) Y <- Y/(sum(svd(Y)$d^4)^0.25) X <- as.matrix(X) Y <- as.matrix(Y) w <- sum(svd(t(X) %*% Y)$d^2) res <- c(res,w) } names(res)<- row.names(m$cov2) return(res) } ## ----rvm----------------------------------------------------------------- RV.mcoa(mcia79$mcoa) ## ----rvm2---------------------------------------------------------------- mcia79$mcoa$RV ## ----assesingMCIAcov----------------------------------------------------- mcia79$mcoa$cov2 plot(mcia79$mcoa$cov2, xlab = "pseudoeig 1", ylab = "pseudoeig 2", pch=19, col="red") text(mcia79$mcoa$cov2, labels=rownames(mcia79$mcoa$cov2), cex=0.8, adj=0) ## ----assessingMCIA, echo=FALSE, eval=FALSE------------------------------- ## mcia79$mcoa$lambda ## plot(mcia79$mcoa$lambda) ## ----mciaAxes, message=FALSE, warning=FALSE,fig.keep='all'--------------- mcia79$mcoa$Tax dev.off() par(mfrow=c(4,2)) xx<-by(mcia79$mcoa$Tax, substr(rownames(mcia79$mcoa$Tax),1,3), s.corcircle) ## ----samplescores-------------------------------------------------------- #plotarrays(mcia79$mcoa$SynVar, classvec=BRCA_TCGA_79$clin$PAM50.mRNA) kplot(mcia79$mcoa, mfrow = c(3,4), clab = .8, csub = 3, cpoi = 3) ## ----genescores---------------------------------------------------------- summary(mcia79$mcoa$axis) par(mfrow=c(1,2)) plot(mcia79$mcoa$axis[,1]~factor(mcia79$mcoa$TC[,1]), col=1:7, names=names(mcia79$coa), ylab="Gene Scores PC1", xlab="", las=2) plot(mcia79$mcoa$axis[,2]~factor(mcia79$mcoa$TC[,1]), col=1:7, names=names(mcia79$coa), ylab="Gene Scores PC2", xlab="", las=2) ## ----features------------------------------------------------------------ mcia79$mcoa$axis[order(mcia79$mcoa$axis[,1]),][1:10,1, drop=FALSE] ## Dataset suffix cbind(1:7,rownames(mcia79$mcoa$cov2)) ## ----catData------------------------------------------------------------- ## To "concatentate"" data, mm<-function(x) substr(x, 1, nchar(x)-2) ids<-mm(rownames(mcia79$mcoa$axis)) # Whilst it would be great, to have alll of our data mapped to genome co-ordinates and really take the union of everything, to keep it simple, I will only look at the Gene Symbols (RNAseq, RPPA) library(HGNChelper) library(Biobase) idsFix<- checkGeneSymbols(ids) ## Get PC1 idsPC<-cbind(idsFix, PC=mcia79$mcoa$axis) GOs<-select(org.Hs.eg.db, columns="GO",keytype="SYMBOL",keys=idsPC$Suggested.Symbol) #Get Coordinates for GOs terms res<-tapply(GOs$SYMBOL, GOs$GO, function(Syms) colMeans(idsPC[idsPC$Suggested.Symbol%in%Syms,c("PC.Axis1","PC.Axis2")])) res<-do.call(rbind,res) res[1:2,] plot(mcia79$mcoa$axis, col =as.numeric(mcia79$mcoa$TC[,1]), pch=as.numeric(mcia79$mcoa$TC[,1])) tt<-topgenes(res, n=5) points(res[tt,], pch=19, col="gray") text(res[tt,], labels=tt, cex=0.8, adj=0) ## ----gsva, eval=FALSE---------------------------------------------------- ## #Exclude NA ## idsPC<-idsPC[!is.na(idsPC$Suggested.Symbol),] ## ## #Reduce to GeneSymbol and MCIA score, taking max ## idsPC1<-tapply(idsPC$PC.Axis1, idsPC$Suggested.Symbol,max) ## idsPC2<-tapply(idsPC$PC.Axis2, idsPC$Suggested.Symbol,max) ## if( !all(names(idsPC1)==names(idsPC2))) stop() ## idsPCs<-cbind(idsPC1, idsPC2) ## ## ## # The "built-in" gsva library are mapped to entrezIDs so map symbols ## entrezPC1<-select(org.Hs.eg.db, columns="ENTREZID",keytype="SYMBOL",keys=rownames(idsPCs)) ## entrezPC1<-entrezPC1[!duplicated(entrezPC1[,1]),] ## table(rownames(idsPCs)==entrezPC1[,1]) ## rownames(idsPCs)= entrezPC1[,2] ## ## # Run any enrichment test with gsva ## require(GSVA) ## require(GSVAdata) ## gsva(idsPCs, c2BroadSets)