#datadm<-read.table('/home/pierre/.hippasos/corpus_agir_CHDS_16/fileACTtemp.csv', header=TRUE,sep=';', quote='\"',row.names = 1, na.strings = 'NA') library(cluster) #dissmat<-daisy(dataact, metric = 'gower', stand = FALSE) #chd<-diana(dissmat,diss=TRUE,) #height<-chd$height #sortheight<-sort(height,decreasing=TRUE) FindBestCluster<-function (x,Max=15) { i<-1 j<-1 ListClasseOk<-list() while (i < Max) { if (x[i]==1){ while (x[i]==1) { i<-i+1 } ListClasseOk[[j]]<-i j<-j+1 } if (x[i]==x[i+1]) { i<-i+1 } else { ListClasseOk[[j]]<-i+1 i<-i+1 j<-j+1 } } unlist(ListClasseOk) } #BestCLusterNb<-FindBestCluster(sortheight) #classes<-as.data.frame(cutree(as.hclust(chd), k=6))[,1] #datadm<-cbind(datadm,classes) #clusplot(datadm,classes,shade=TRUE,color=TRUE,labels=4) BuildContTable<- function (x) { afctable<-NULL for (i in 1:(ncol(x)-1)) { coltable<-table(x[,i],x$classes) afctable<-rbind(afctable,coltable) } afctable } PrintProfile<- function(dataclasse,profileactlist,profileetlist,antiproact,antiproet,clusternb,profileout,antiproout,profilesuplist=NULL,antiprosup=NULL) { prolist<-list() profile<-matrix(,0,6) antipro<-matrix(,0,6) cltot<-as.data.frame(dataclasse[dataclasse[,ncol(dataclasse)]!=0,]) cltot<-as.data.frame(as.character(cltot[,ncol(cltot)])) tot<-nrow(cltot) classes<-as.data.frame(as.character(dataclasse[,ncol(dataclasse)])) classes.s<-as.data.frame(summary(cltot[,1],maxsum=500)) profile<-rbind(profile,c('***','nb classes',clusternb,'***','','')) antipro<-rbind(antipro,c('***','nb classes',clusternb,'***','','')) for(i in 1:clusternb) { profile<-rbind(profile,c('**','classe',i,'**','','')) nbind<-classes.s[which(rownames(classes.s)==i),1] pr<-round((nbind/tot)*100,digits=2) profile<-rbind(profile,c('****',nbind,tot,pr,'****','')) if (length(profileactlist[[1]][[i]])!=0){ profile<-rbind(profile,as.matrix(profileactlist[[1]][[i]])) } if (!is.null(profilesuplist)) { profile<-rbind(profile,c('*****','*','*','*','*','*')) if (length(profilesuplist[[1]][[i]])!=0) { profile<-rbind(profile,as.matrix(profilesuplist[[1]][[i]])) } } if (!is.null(profileetlist)) { profile<-rbind(profile,c('*','*','*','*','*','*')) if (length(profileetlist[[1]][[i]])!=0) { profile<-rbind(profile,as.matrix(profileetlist[[1]][[i]])) } } antipro<-rbind(antipro,c('**','classe',i,'**','','')) antipro<-rbind(antipro,c('****',nbind,tot,pr,'****','')) if (length(antiproact[[1]][[i]])!=0) { antipro<-rbind(antipro,as.matrix(antiproact[[1]][[i]])) } if (!is.null(profilesuplist)) { antipro<-rbind(antipro,c('*****','*','*','*','*','*')) if (length(antiprosup[[1]][[i]])!=0) { antipro<-rbind(antipro,as.matrix(antiprosup[[1]][[i]])) } } if (!is.null(profileetlist)) { antipro<-rbind(antipro,c('*','*','*','*','*','*')) if (length(antiproet[[1]][[i]])!=0) { antipro<-rbind(antipro,as.matrix(antiproet[[1]][[i]])) } } } write.csv2(profile,file=profileout,row.names=FALSE) write.csv2(antipro,file=antiproout,row.names=FALSE) } AddCorrelationOk<-function(afc) { rowcoord<-afc$rowcoord colcoord<-afc$colcoord factor <- ncol(rowcoord) hypo<-function(rowcoord,ligne) { somme<-0 for (i in 1:factor) { somme<-somme+(rowcoord[ligne,i])^2 } sqrt(somme) } cor<-function(d,hypo) { d/hypo } CompCrl<-function(rowcol) { out<-rowcol for (i in 1:factor) { for(ligne in 1:nrow(rowcol)) { out[ligne,i]<-cor(rowcol[ligne,i],hypo(rowcol,ligne)) } } out } afc$rowcrl<-CompCrl(rowcoord) afc$colcrl<-CompCrl(colcoord) afc } AsLexico<- function(x) { x<-as.matrix(x) sumcol<-colSums(x) sumrow<-rowSums(x) tot<-sum(sumrow) tablesqr<-x tablep<-x mod.names<-rownames(x) #problem exemple aurelia for (classe in 1:ncol(x)) { print(classe) for (ligne in 1:nrow(x)) { conttable<-matrix(0,2,2) conttable[1,1]<-as.numeric(x[ligne,classe]) conttable[1,2]<-sumrow[ligne]-conttable[1,1] conttable[2,1]<-sumcol[classe]-conttable[1,1] conttable[2,2]<-tot-sumrow[ligne]-conttable[2,1] chiresult<-chisq.test(conttable,correct=TRUE) if (is.na(chiresult$p.value)) { chiresult$p.value<-1 chiresult$statistic<-0 } obsv<-chiresult$expected pval<-as.character(format(chiresult$p.value,scientific=TRUE)) spval<-strsplit(pval,'e') if (is.na(spval)) { print(spval) } if (conttable[1,1]>obsv[1,1]) { tablep[ligne,classe]<-as.numeric(spval[[1]][2])*(-1) tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3) } else if (conttable[1,1]obsv[1,1]) { as.numeric(spval[[1]][2])*(-1) } else if (tb[1,1]obsv[1,1]) { chiresult$p.value } else if (tb[1,1]obsv[1,1]) { chiresult$statistic } else if (tb[1,1] cont[1,1]) { chiresult$statistic <- chiresult$statistic * -1 } chi2[tg,classe] <- chiresult$statistic pchi2[tg,classe] <- chiresult$p.value } } res <- list(chi2 = chi2, pchi2 = pchi2) } BuildProf<- function(x,dataclasse,clusternb,lim=2) { #### #r.names<-rownames(x) #x<-as.matrix(x) #rownames(x)<-r.names #### #nuce<-nrow(dataclasse) sumcol<-paste(NULL,1:nrow(x)) colclasse<-dataclasse[,ncol(dataclasse)] nuce <- length(which(colclasse != 0)) # for (i in 1:nrow(x)) { # sumcol[i]<-sum(x[i,]) # } # afctablesum<-cbind(x,sumcol) afctablesum <- cbind(x, rowSums(x)) #dataclasse<-as.data.frame(dataclasse[dataclasse[,ncol(dataclasse)]!=0,]) dataclasse<-as.matrix(dataclasse[dataclasse[,ncol(dataclasse)]!=0,]) tablesqr<-x tablep<-x x<-afctablesum listprofile<-list() listantipro<-list() mod.names<-rownames(x) prof<-list() aprof<-list() lnbligne<-matrix() for (classe in 1:clusternb) { lnbligne[classe]<-length(colclasse[colclasse==classe]) prof[[classe]]<-data.frame() aprof[[classe]]<-data.frame() } for (ligne in 1:nrow(x)) { for (classe in 1:clusternb) { nbligneclasse<-lnbligne[classe] conttable<-data.frame() conttable[1,1]<-as.numeric(x[ligne,classe]) conttable[1,2]<-as.numeric(as.vector(x[ligne,ncol(x)]))-as.numeric(x[ligne,classe]) conttable[2,1]<-nbligneclasse-as.numeric(x[ligne,classe]) conttable[2,2]<-nrow(dataclasse)-as.numeric(as.vector(x[ligne,ncol(x)]))-conttable[2,1] chiresult<-chisq.test(conttable,correct=FALSE) if (is.na(chiresult$p.value)) { chiresult$p.value<-1 chiresult$statistic<-0 china=TRUE } obsv<-chiresult$expected tablep[ligne,classe]<-round(chiresult$p.value,digits=3) #tablep[ligne,classe]<-format(chiresult$p.value, scientific=TRUE) if (chiresult$statistic>=lim) { if (conttable[1,1]>obsv[1,1]) { tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3) prof[[classe]][nrow(prof[[classe]])+1,1]<-as.numeric(x[ligne,classe]) prof[[classe]][nrow(prof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)])) prof[[classe]][nrow(prof[[classe]]),3]<-round((as.numeric(as.vector(x[ligne,classe]))/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2) prof[[classe]][nrow(prof[[classe]]),4]<-round(chiresult$statistic,digits=2) prof[[classe]][nrow(prof[[classe]]),5]<-mod.names[ligne] prof[[classe]][nrow(prof[[classe]]),6]<-chiresult$p.value } else if (conttable[1,1]obsv[1,1]) { tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3) } else if (conttable[1,1]=lim) { if (conttable[1,1]>obsv[1,1]) { tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3) prof[[classe]][nrow(prof[[classe]])+1,1]<-as.numeric(x[ligne,classe]) prof[[classe]][nrow(prof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)])) prof[[classe]][nrow(prof[[classe]]),3]<-round((as.numeric(as.vector(x[ligne,classe]))/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2) prof[[classe]][nrow(prof[[classe]]),4]<-round(chiresult$statistic,digits=2) prof[[classe]][nrow(prof[[classe]]),5]<-mod.names[ligne] prof[[classe]][nrow(prof[[classe]]),6]<-chiresult$p.value } else if (conttable[1,1]obsv[1,1]) { tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3) } else if (conttable[1,1]