## ----设置,回声=假--------------------------------------------选项(ERROR = TRACEBACT)库(LEARDBICOCTIONER)SET.SEED(123L)StopIFNOT(Biocinstaller :: Biocverse()==“3.0”)## ----风格,Echo = False,结果='ASIS'--------------------------生物天际:: Markdown()knitr :: Opts_chunk$ set(tidy = false)## ---- cvn-setup-1,回声= false -------------------------------------------------- - destdir < - “〜/ bigdata”如果(!file.exists(destdir))dir.create(destdir)## ---- CNV-Setup-2,消息= false ---------------------------------------------set path/to/download/directory, e.g., ## destdir <- "~/bam/copynumber" stopifnot(file.exists(destdir)) bamFiles <- file.path(destdir, c("tumorA.chr4.bam", "normalA.chr4.bam")) urls <- paste0("http://s3.amazonaws.com/copy-number-analysis/", basename(bamFiles)) for (i in seq_along(bamFiles)) if (!file.exists(bamFiles[i])) { download.file(urls[i], bamFiles[i]) download.file(paste0(urls[i], ".bai"), paste0(bamFiles[i], ".bai")) } ## ----cnv-workflow, message=FALSE----------------------------------------- ## 1. Load the cn.mops package suppressPackageStartupMessages({ library(cn.mops) }) ## 2. We can bin and count the reads reads_gr <- getReadCountsFromBAM(BAMFiles = bamFiles, sampleNames = c("tumor", "normal"), refSeqName = "chr4", WL = 10000, mode = "unpaired") ## 3. Noramlization ## We need a special normalization because the tumor has many large CNVs X <- normalizeGenome(reads_gr, normType="poisson") ## 4. Detect cnv's ref_analysis <- referencecn.mops(X[,1], X[,2], norm=0, I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64), classes = paste0("CN", c(0:8, 16, 32, 64, 128)), segAlgorithm="DNAcopy") resCNMOPS <- calcIntegerCopyNumbers(ref_analysis) ## 5. Visualize the cnv's segplot(resCNMOPS) ## ----cnv-regions--------------------------------------------------------- human_cn <- cnvr(resCNMOPS) human_cn ## ----cnv-annotate-txdb, message=FALSE------------------------------------ library(TxDb.Hsapiens.UCSC.hg19.knownGene) ## subset to work with only chr4 txdb <- keepSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, "chr4") genes0 <- genes(txdb) ## 'unlist' so that each range is associated with a single gene identifier idx <- rep(seq_along(genes0), elementLengths(genes0$gene_id)) genes <- granges(genes0)[idx] genes$gene_id = unlist(genes0$gene_id) ## ----cnv-annotate-------------------------------------------------------- olaps <- findOverlaps(genes, human_cn, type="within") idx <- factor(subjectHits(olaps), levels=seq_len(subjectLength(olaps))) human_cn$gene_ids <- splitAsList(genes$gene_id[queryHits(olaps)], idx) human_cn ## ----cvn-session-info---------------------------------------------------- restoreSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene) sessionInfo()