http://geossdev.med.virginia.edu/research/teaching/2011/R/source-R-tutorial-Day2-bioconductor.R
http://www.ncbi.nlm.nih.gov/geo/geo2r/?acc=GSE10784
http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/r/geo/
# Version info: R 2.12.1, Biobase 2.12.1, GEOquery 2.18.0, limma 3.8.1
# R scripts generated Thu Feb 23 16:57:44 EST 2012
# Unable to generate script analyzing differential expression.
# Invalid input: at least two groups of samples should be selected.
################################################################
# Boxplot for selected GEO samples
library(Biobase)
library(GEOquery)
# load series and platform data from GEO
gset <- getGEO("GSE10784", GSEMatrix =TRUE)
if (length(gset) > 1) idx <- grep("GPL1261", attr(gset, "names")) else idx <- 1 gset <- gset[[idx]]
#Download GPL file, put it in the current directory, and load it:
gpl1261 <- getGEO('GPL1261', destdir=".")
#show probe_id and corresponding gene symbol
Table(gpl1261)[1:10,c("ID",'Gene.Symbol')]
# ID Gene.Symbol
#1 1415670_at Copg
#2 1415671_at Atp6v0d1
# set parameters and draw the plot
dev.new(width=4+dim(gset)[[2]]/5, height=6) par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))
title <- paste ("GSE10784", '/', annotation(gset), " selected samples", sep ='')
boxplot(exprs(gset), boxwex=0.7, notch=T, main=title, outline=FALSE, las=2)
legend("topleft", labels, fill=palette(), bty="n")
featureNames(gset)[1:10]
# Get sample names
sampleNames(gset)
# Get phenotype data of samples
pData(gset)
# read CEL files
gsm272325 <- ReadAffy('GSM272325.CEL') # downloaded from GEO, untar and unzipped, one sample
# http://www.biostat.iupui.edu/~XiaochunLi/Portugal/Biocon_lab1/Biocon_lab1.pdf
#Next we use rma to background correct, normalize, and summarize the probe level data
#into an expression measure for each probe set (gene) on each of the six arrays. The
#expression values are in log base 2 scale.
eset <- rma(gsm272325)
e <- exprs(eset)
dim(e)
No comments:
Post a Comment