Thursday, February 23, 2012

GEO samples

http://geossdev.med.virginia.edu/research/teaching/2011/R/source-R-tutorial-Day2-bioconductor.R
http://www.ncbi.nlm.nih.gov/geo/geo2r/?acc=GSE10784
http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/r/geo/

# Version info: R 2.12.1, Biobase 2.12.1, GEOquery 2.18.0, limma 3.8.1
# R scripts generated Thu Feb 23 16:57:44 EST 2012

# Unable to generate script analyzing differential expression.
# Invalid input: at least two groups of samples should be selected.

################################################################
# Boxplot for selected GEO samples
library(Biobase)
library(GEOquery)

# load series and platform data from GEO
gset <- getGEO("GSE10784", GSEMatrix =TRUE)

if (length(gset) > 1) idx <- grep("GPL1261", attr(gset, "names")) else idx <- 1 gset <- gset[[idx]]

 #Download GPL file, put it in the current directory, and load it:
gpl1261 <- getGEO('GPL1261', destdir=".") 

#show probe_id and corresponding gene symbol
Table(gpl1261)[1:10,c("ID",'Gene.Symbol')]
# ID Gene.Symbol
#1 1415670_at Copg
#2 1415671_at Atp6v0d1

 # set parameters and draw the plot
dev.new(width=4+dim(gset)[[2]]/5, height=6) par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))
title <- paste ("GSE10784", '/', annotation(gset), " selected samples", sep ='')
boxplot(exprs(gset), boxwex=0.7, notch=T, main=title, outline=FALSE, las=2)
legend("topleft", labels, fill=palette(), bty="n")
featureNames(gset)[1:10]

 # Get sample names
sampleNames(gset)

 # Get phenotype data of samples
pData(gset)

 # read CEL files
gsm272325 <- ReadAffy('GSM272325.CEL')  # downloaded from GEO, untar and unzipped, one sample

 # http://www.biostat.iupui.edu/~XiaochunLi/Portugal/Biocon_lab1/Biocon_lab1.pdf
#Next we use rma to background correct, normalize, and summarize the probe level data
#into an expression measure for each probe set (gene) on each of the six arrays. The
#expression values are in log base 2 scale.
eset <- rma(gsm272325)
e <- exprs(eset)
dim(e)

No comments: