consensus cluster

if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")

BiocManager::install("ConsensusClusterPlus")

library(ALL)
data(ALL)
d=exprs(ALL)
d[1:5,1:5]
# 12625gene 128 patient

## ABOUT apply() ##
# apply(dataset, MARGIN, kansuu)
# MARGIN 1 = column, 2 = row, c(1,2) = column and row
# sweep(dataset, MARGIN, toukeiryou, FUN="-" or FUN="+")
# - or + toukeiryou
# example) sweep(dataset, 1, apply(dataset, 1, median))
# tyuuouchi wo hiiteiru


mads=apply(d,1,mad)
#mean absolute distance applied
d2=d[rev(order(mads))[1:5000],]
#5000 top gene selected

d3 = sweep(d2,1, apply(d2,1,median,na.rm=T))
#centerized
# if normalization is neccesary, do so

library(ConsensusClusterPlus)
title=tempdir()
results = ConsensusClusterPlus(d3,maxK=6,reps=50,pItem=0.8,pFeature=1,title=title,clusterAlg="hc",distance="pearson",seed=1262118388.71279)
# maxK:evaluated number of cluster, usually ~20
# reps:number of resampling, usually ~1000
# pItem: number of item resampled
# pFeature: number of gene resampled
# clusteringAlg: algolism hc = hierarchical, pam, partioning aroung medroids, km, kmeans
# distance, pearson = 1-pearson, spearman = 1-spearman, euclidean = euclidean
# plot="png" written in manual, but deleted

#consensusMatrix - the consensus matrix.
#For .example, the top five rows and columns of results for k=2:
results[[2]][["consensusMatrix"]][1:5,1:5]

#consensusTree - hclust object
results[[2]][["consensusTree"]]

#consensusClass - the sample classifications
results[[2]][["consensusClass"]][1:5]

icl = calcICL(results,title=title)
icl[["clusterConsensus"]]
icl[["itemConsensus"]][1:5,]

この記事が気に入ったらサポートをしてみませんか?