1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
| setwd("path to\\GSE65212")
sample<-read.csv("sample.csv") TNBC<-sample$Sample_geo_accession[1:55] normal<-sample$Sample_geo_accession[56:66] group_list<-c(rep('TNBC',55),rep('Normal',11))
GSE65212<-read.table('GSE65212_series_matrix.txt.gz', sep='\t',quote = "",fill = T, comment.char="!",header=T) colname<-colnames(GSE65212) colname<-gsub("\\.","",colname) colname<-gsub("X","",colname) colnames(GSE65212)<-colname GSE65212_TNBC_normal<-GSE65212[,c(TNBC,normal)] rownames(GSE65212_TNBC_normal)<-GSE65212$ID_REF rownames(GSE65212_TNBC_normal)<-gsub('\\"',"",rownames(GSE65212_TNBC_normal))
GPL14877<-read.table('GPL14877_HGU133Plus2_Hs_ENTREZG_mapping.txt.gz', sep='\t',quote = "",fill = T, comment.char="!",header=T) table(rownames(GSE65212_TNBC_normal)%in%GPL14877$Probe.Set.Name) exprSet<-GSE65212_TNBC_normal
exprSet<-GSE65212_TNBC_normal table(rownames(GSE65212_TNBC_normal)%in%GPL14877$Probe.Set.Name) dim(exprSet) exprSet<-exprSet[rownames(exprSet)%in%GPL14877$Probe.Set.Name,] dim(exprSet) GPL14877<-GPL14877[match(rownames(exprSet),GPL14877$Probe.Set.Name),] head(GPL14877) exprSet[1:5,1:5] tmp<-by(exprSet,GPL14877$Affy.Probe.Set.Name,function(x)rownames(x)[which.max(rowMeans(x))]) probes<-as.character(tmp) dim(exprSet) exprSet<-exprSet[rownames(exprSet)%in%probes,] dim(exprSet) rownames(exprSet)<-GPL14877[match(rownames(exprSet),GPL14877$Probe.Set.Name),7] exprSet[1:5,1:5]
GPL570<-read.table("GPL570-55999.txt",sep='\t',quote = "",fill = T, comment.char="#",header=T) GPL570<-GPL570[,c(1,11)] table(GPL14877$Affy.Probe.Set.Name%in%GPL570$ID)
table(rownames(exprSet)%in%GPL570$ID) dim(exprSet) head(GPL570) GPL570<-GPL570[match(rownames(exprSet),GPL570$ID),] head(GPL570) exprSet[1:5,1:5] tmp<-by(exprSet,GPL570$Gene.Symbol,function(x)rownames(x)[which.max(rowMeans(x))]) probes<-as.character(tmp) dim(exprSet) exprSet<-exprSet[rownames(exprSet)%in%probes,] dim(exprSet) rownames(exprSet)<-GPL570[match(rownames(exprSet),GPL570$ID),2] exprSet[1:5,1:5]
suppressMessages(library(limma)) design<-model.matrix(~0+factor(group_list)) colnames(design)<-levels(factor(group_list)) rownames(design)<-colnames(exprSet) design contrast.matrix<-makeContrasts(paste0(unique(group_list),collapse="-"),levels = design) contrast.matrix fit<-lmFit(exprSet,design) fit2<-contrasts.fit(fit,contrast.matrix) fit2<-eBayes(fit2) tempOutput<-topTable(fit2,coef = 1,n=Inf) nrDEG=na.omit(tempOutput) head(nrDEG) test<-nrDEG test$id<-rownames(nrDEG)
topT <- as.data.frame(nrDEG) logFC_Cutoff<-with(topT,mean(abs(logFC))+2*sd(abs(logFC)))
select_gene=(abs(nrDEG$logFC)>=logFC_Cutoff)&(nrDEG$adj.P.Val<0.05) select_gene.sig=nrDEG[select_gene,] write.csv(select_gene.sig,'GSE65212_TNBC_Normal_select_gene.sig.csv')
|