## ----样式,eval = true,echo = false,结果='asis'---------------------------------------------------------------------------------------------------------------- options(max.print=1000) BiocStyle::latex() library(knitr) opts_chunk$set(cache=TRUE, tidy=FALSE) ## ----packages, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE---------------------- suppressPackageStartupMessages({ library(org.Hs.eg.db) library(txdb.hsapiens.ucsc.hg19.nowngene)库(bsgenome.hsapiens.ucsc.hg19)库(Genomicranges)库(biomart)库(rtracklayer)库(rtracklayer)})## ----------------------------------------------------------------------------------------------------------------------------------------------- <-c(“ ensg00000130720”,“ ensg0000010103257”,“ ensg00000156414”,“ ensg000001444644”,“ ensg00000159307”,“ ENSG000001444485”)------------------------------------------------------------------------------------------------------------------------- library(org.Hs.eg.db) keytypes(org.Hs.eg.db) columns(org.Hs.eg.db) cols <- c("SYMBOL", "GENENAME") select(org.Hs.eg.db, keys=ensids, columns=cols, keytype="ENSEMBL") ## ----biomaRt1, eval=FALSE, results="hide"----------------------------------------------- # ## NEEDS INTERNET ACCESS !! # library(biomaRt) # head(listMarts(), 3) ## list the marts # head(listDatasets(useMart("ensembl")), 3) ## mart datasets # ensembl <- ## fully specified mart # useMart("ensembl", dataset = "hsapiens_gene_ensembl") # # head(listFilters(ensembl), 3) ## filters # myFilter <- "chromosome_name" # substr(filterOptions(myFilter, ensembl), 1, 50) ## return values # myValues <- c("21", "22") # head(listAttributes(ensembl), 3) ## attributes # myAttributes <- c("ensembl_gene_id","chromosome_name") # # ## assemble and query the mart # res <- getBM(attributes = myAttributes, filters = myFilter, # values = myValues, mart = ensembl) ## ----symbol-to-entrez------------------------------------------------------------------- library(org.Hs.eg.db) eid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")[["ENTREZID"]] ## ----entrez-to-tx----------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene txid <- select(txdb, eid, "TXNAME", "GENEID")[["TXNAME"]] ## ----tx-to-cds-coords------------------------------------------------------------------- cds <- cdsBy(txdb, by="tx", use.names=TRUE) brca1cds <- cds[names(cds) %in% txid] class(brca1cds) length(brca1cds) brca1cds[[1]] # exons in cds cdswidth <- width(brca1cds) # width of each exon all((sum(cdswidth) %% 3) == 0) # sum within cds, modulus 3 ## ----cds-to-seq------------------------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) genome <- BSgenome.Hsapiens.UCSC.hg19 tx_seq <- extractTranscriptSeqs(genome, brca1cds) tx_seq ## ----introns---------------------------------------------------------------------------- introns <- psetdiff(range(brca1cds), brca1cds) ## ----intron-seqs------------------------------------------------------------------------ seq <- getSeq(genome, introns) names(seq) seq[["uc010whl.2"]] # 21 introns ## ----rtracklayer-roi-------------------------------------------------------------------- library(GenomicRanges) roi <- GRanges("chr10", IRanges(92106877, 112106876, names="ENSG00000099194")) ## ----rtracklayer-session---------------------------------------------------------------- library(rtracklayer) session <- browserSession() ## ----rtracklayer-marks------------------------------------------------------------------ trackName <- "wgEncodeRegTfbsClusteredV2" tableName <- "wgEncodeRegTfbsClusteredV2" trFactor <- "ERalpha_a" ucscTable <- getTable(ucscTableQuery(session, track=trackName, range=roi, table=tableName, name=trFactor)) ## ----rtracklayer-plot, fig.height=3----------------------------------------------------- plot(score ~ chromStart, ucscTable, pch="+") abline(v=start(roi) + (end(roi) - start(roi) + 1) / 2, col="blue")