...+++...
[iramuteq] / Rscripts / simiold.R
1 #############################################################
2 makesimi<-function(dm){
3         a<-dm
4         m<-matrix(0,ncol(dm),ncol(dm))
5         rownames(m)<-colnames(a)
6         colnames(m)<-colnames(a)
7         eff<-colSums(a)
8         for (col in 1:(ncol(a)-1)){
9                 for (colc in (col+1):ncol(a)){
10                         ta<-table(a[,col],a[,colc])
11                         if (ncol(ta)==1 & colnames(ta)[1]=='0') {
12                                 ta<-cbind(ta,'1'=c(0,0))
13                         } else if (ncol(ta)==1 & colnames(ta)[1]=='1') {
14                                 ta<-cbind('0'=c(0,0),t)
15                         } else if (nrow(ta)==1 & rownames(ta)[1]=='0'){
16                                 ta<-rbind(ta,'1'=c(0,0))
17                         } else if (nrow(ta)==1 & rownames(ta)[1]=='1') {
18                                 ta<-rbind('0'=c(0,0),ta)
19                         }
20                         #m[colc,col]<-length(which((a[,col]==1) & (a[,colc]==1))) 
21                         m[colc,col]<-ta[2,2]
22                         m[col,colc]<-m[colc,col]
23                 }
24         }
25         out<-list(mat=m,eff=eff)
26
27
28 makejac<-function(dm){
29         a<-dm
30         m<-matrix(0,ncol(dm),ncol(dm))
31         rownames(m)<-colnames(a)
32         colnames(m)<-colnames(a)
33         eff<-colSums(a)
34         for (col in 1:(ncol(a)-1)){
35                 for (colc in (col+1):ncol(a)){
36                         ta<-table(a[,col],a[,colc])
37                         if (ncol(ta)==1 & colnames(ta)[1]=='0') {
38                                 ta<-cbind(ta,'1'=c(0,0))
39                         } else if (ncol(ta)==1 & colnames(ta)[1]=='1') {
40                                 ta<-cbind('0'=c(0,0),t)
41                         } else if (nrow(ta)==1 & rownames(ta)[1]=='0'){
42                                 ta<-rbind(ta,'1'=c(0,0))
43                         } else if (nrow(ta)==1 & rownames(ta)[1]=='1') {
44                                 ta<-rbind('0'=c(0,0),ta)
45                         }
46                         m[colc,col]<-(ta[2,2]/(ta[1,2]+ta[2,1]+ta[2,2]))*100
47                         #m[colc,col]<-(length(which((a[,col]==1) & (a[,colc]==1)))/(eff[col]+eff[colc]-length(which((a[,col]==1) & (a[,colc]==1)))))*100
48                         m[col,colc]<-m[colc,col]
49                 }
50         }
51         out<-list(mat=m,eff=eff)
52 }
53
54 makesimipond<-function(dm) {
55         a<-dm
56         m<-matrix(0,ncol(dm),ncol(dm))
57         rownames(m)<-colnames(dm)
58         colnames(m)<-colnames(dm)
59         eff<-colSums(a)
60         #a<-t(a)
61         #print(a)
62         #lt<-list()
63         for (col in 1:(ncol(a)-1)){
64                 for (colc in (col+1):ncol(a)){
65                         m[colc,col]<-length(which((a[,col]>1) & (a[,colc]>1)))
66                         m[col,colc]<-m[colc,col]
67                 }
68         }
69         out<-list(mat=m,eff=eff)
70 }
71
72 BuildProf01<-function(x,classes) {
73         #x : donnees en 0/1
74         #classes : classes de chaque lignes de x
75         dm<-cbind(x,cl=classes)
76         clnb=length(summary(as.data.frame(as.character(classes)),max=100))
77         print(clnb)
78         print(summary(as.data.frame(as.character(classes)),max=100))
79         mat<-matrix(0,ncol(x),clnb)
80         #print(mat)
81         rownames(mat)<-colnames(x)
82         for (i in 1:clnb) {
83                 dtmp<-dm[which(dm$cl==i),]
84                 for (j in 1:(ncol(dtmp)-1)) {
85                         #print(rownames(dtmp[j,]))
86                         mat[j,i]<-sum(dtmp[,j])
87                 }
88         }
89         mat
90 }
91 ###################################################################
92
93 source('/home/pierre/workspace/iramuteq/Rscripts/chdfunct.R')
94 #info<-read.csv2(file('/home/pierre/fac/etudiant/crepin/simi/info01.csv',encoding='latin1'),row.names=1)
95 #cinf<-ncol(info)
96 #biblio<-read.csv2(file('/home/pierre/fac/etudiant/crepin/simi/biblio01.csv',encoding='latin1'),row.names=1)
97 #cbib<-ncol(biblio)
98 #rechdoc<-read.csv2(file('/home/pierre/fac/etudiant/crepin/simi/recherdoc01.csv',encoding='latin1'),row.names=1)
99 #crech<-ncol(rechdoc)
100
101 ######################@@
102 #MV
103 #########################
104 #chaud<-read.csv2('/home/pierre/fac/etudiant/sab/Chaudronnier_pour_MV01.csv',row.names=1)
105 ##cchaud<-ncol(chaud)
106 #op<-read.csv2('/home/pierre/fac/etudiant/sab/ocn_pour_mv01.csv',row.names=1)
107 ##cop<-ncol(op)
108 #soud<-read.csv2('/home/pierre/fac/etudiant/sab/soudeur_pour_mv01.csv',row.names=1)
109 ##csoud<-ncol(soud)
110 #indus<-read.csv2('/home/pierre/fac/etudiant/sab/metier_indus01.csv',row.names=1)
111 ##cmindus<-ncol(mindus)
112 #infor<-read.csv2('/home/pierre/fac/etudiant/sab/Informaticien_pour_MV01.csv',row.names=1)
113 ##cinfor<-ncol(infor)
114 #trav<-read.csv2('/home/pierre/fac/etudiant/sab/travail_pour_mv01.csv',row.names=1)
115 ##ctrav<-ncol(trav)
116 #tot<-cbind(chaud,op)
117 #tot<-cbind(tot,soud)
118 #tot<-cbind(tot,indus)
119 ##
120 #list_data<-list('a'=chaud, 'b'=soud,'c'=infor,'d'=trav)
121 #tot<-cbind(chaud,op)
122 #tot<-chaud
123 ##tot<-cbind(tot,infor)
124 ##tot<-cbind(tot,trav)
125 #mv<-read.csv2('/home/pierre/fac/etudiant/sab/chaud_soud_opn_metierindus01.csv')
126 #mv<-mv[,-ncol(mv)]
127 #grp<-read.csv2('/home/pierre/fac/etudiant/sab/grp2.csv')
128 #tab<-tot
129
130 print('passe ici')
131 #tot<-mv[,-ncol(mv)]
132 #tot<-trav
133 #################################################
134 #AGIR
135 #ag<-read.csv2('/home/pierre/workspace/iramuteq/corpus/tableau_agir2_recod_alc_entree_pourpassage_temp_AlcesteQuest_1/Act01.csv')
136 #cag<-ncol(ag)
137 #load('/home/pierre/workspace/iramuteq/corpus/tableau_agir2_recod_alc_entree_pourpassage_temp_AlcesteQuest_1/RData.RData')
138
139 ###############################################################
140 #Vero
141 #tot<-read.csv2('/home/pierre/fac/etudiant/crepin/simi/simitot01.csv')
142 #grp<-vector(mode='integer',length=ncol(tot))
143 #grp[1:54]<-1
144 #grp[55:108]<-2
145 #grp[109:162]<-3
146 #prof<-BuildProf01(tot,grp[,1])
147 ##prof<-prof[-nrow(prof),]
148 #outp<-AsLexico(prof)
149 #chistabletot<-outp[[2]]
150
151 ###################################################################
152 #Steph
153
154 #tot<-read.csv2('/home/pierre/fac/etudiant/netto/simi-27-03-09/prof_inf_ecole_priv01.csv')
155 #tot<-tot[,-ncol(tot)]
156 #grp<-vector(mode='integer',length=nrow(tot))
157 #grp[1:245]<-1
158 #grp[246:490]<-2
159 #prof<-BuildProf01(tot,grp)
160 #print(nrow(prof))
161 ##print(prof)
162 #outp<-AsLexico(prof)
163 #chistabletot<-outp[[2]]
164
165 #tot<-read.csv2('/home/pierre/fac/etudiant/netto/simi-27-03-09/etu_inf_pro_priv01.csv')
166 #print(tot[,ncol(tot)])
167 #tot<-tot[,-ncol(tot)]
168 #grp<-vector(mode='integer',length=nrow(tot))
169 #grp[1:175]<-1
170 #grp[176:350]<-2
171 #prof<-BuildProf01(tot,grp)
172 #outp<-AsLexico(prof)
173 #chistabletot<-outp[[2]]
174
175 #tot<-read.csv2('/home/pierre/fac/etudiant/netto/simi-27-03-09/prof_etu_inf_pro01.csv')
176 #print(tot[,ncol(tot)])
177 #tot<-tot[,-ncol(tot)]
178 #grp<-vector(mode='integer',length=nrow(tot))
179 #grp[1:245]<-1
180 #grp[246:419]<-2
181 #prof<-BuildProf01(tot,grp)
182 #outp<-AsLexico(prof)
183 #chistabletot<-outp[[2]]
184
185 #######################################################
186 #grp ideal 2008
187 #tot<-read.csv2('/home/pierre/workspace/iramuteq/corpus/grpiedal2008_01.csv')
188 #print(tot[,ncol(tot)])
189 #grp<-vector(mode='integer',length=ncol(tot))
190 #grp[1:10]<-1
191 #grp[11:20]<-2
192 #prof<-BuildProf01(tot,grp)
193 #outp<-AsLexico(prof)
194 #chistabletot<-outp[[2]]
195
196 #ministre<-read.csv2('/home/pierre/workspace/iramuteq/corpus/Ministres_AFCUCI_3/TableCont.csv',header=TRUE)
197 #eff<-colSums(ministre)
198 #ministre<-ministre[,-which(eff<100)]
199 #
200 ##ministre<-(ministre/colSums(ministre))*1000
201 #print(ncol(ministre))
202 #pm<-t(ministre)
203 #outp<-AsLexico(pm)
204 #chistabletot<-outp[[2]]
205 #ministre<-read.csv2('/home/pierre/workspace/iramuteq/corpus/Ministres_Alceste_1/TableUc1.csv',header=TRUE)
206 #load('/home/pierre/workspace/iramuteq/corpus/Ministres_Alceste_1/RData.RData')
207 #ministre<-ministre[,-which(eff<200)]
208 #uce<-read.csv2('/home/pierre/workspace/iramuteq/corpus/Ministres_Alceste_1/listeUCE1.csv')
209 #uc<-matrix(0,nrow(ministre),1)
210 #uc<-list()
211 #print(uce)
212 #print(nrow(n1))
213 #for (i in 1:nrow(uce)) {
214 #    uc[[as.integer(uce[i,2])+1]]<-n1[i,ncol(n1)]
215 #}
216 #print(nrow(ministre))
217 #grp<-unlist(uc)
218 #prof<-BuildProf01(ag,n1[,ncol(n1)])
219 #outp<-AsLexico(prof)
220 #chistabletot<-outp[[2]]
221
222 #######################
223 #Steph
224 #########################
225 #priv<-read.csv2('/home/pierre/workspace/iramuteq/corpus/simi/info_priv01.csv')
226 #pro<-read.csv2('/home/pierre/workspace/iramuteq/corpus/simi/info_pro01.csv')
227 #cpriv<-ncol(priv)
228 #tot<-cbind(priv,pro)
229 #tot<-read.csv2('/home/pierre/workspace/iramuteq/corpus/simi/priv_pro_ens01.csv')
230 #print('tot')
231 #print(ncol(tot))
232
233
234 #grp<-vector(mode='integer',length=nrow(tot))
235 #grp[1:245]<-1
236 #grp[246:nrow(tot)]<-2
237 #grp[491:635]<-3
238 #prof<-BuildProf01(tot,grp)
239 #prof<-prof[,-4]
240 #print(prof)
241 #print(nrow(prof))
242 #outp<-AsLexico(prof)
243 #chistabletot<-outp[[2]]
244 #cs<-colSums(prof)
245 #prof<-prof/cs
246 #l<-vector(mode='integer',length=nrow(prof))
247 #for (line in 1:nrow(prof)) {
248 #    l[line]<-as.integer(which.max(prof[line,1:2]))
249 #}
250 print('lecture')
251 #chistabletot<-chistabletot[1:ncol(ministre),]
252 #print(nrow(chistabletot))
253 #sc<-colSums(as.matrix(ministre))
254 #print(length(sc))
255 #outc<-which(sc>0)
256 #print(length(outc))
257 #ministre<-ministre[,outc]
258 #print(ncol(ministre))
259 #prof<-BuildProf01(ministre,grp)
260 #outp<-AsLexico(prof)
261 #chistabletot<-outp[[2]]
262 #chistabletot<-chistabletot[outc,]
263 #print(nrow(chistabletot))
264 #ave<-read.csv2('/home/pierre/workspace/iramuteq/corpus/Avenir_Alceste_1/TableUc1.csv',header=TRUE)
265 #load('/home/pierre/workspace/iramuteq/corpus/Avenir_Alceste_1/RData.RData')    
266 #chistabletot<-chistabletot[1:cag,]
267
268 #print(grp)
269 #grp<-as.character(grp[,1])
270 #titre<-c("chaudronnier","soudeur","informaticien","travail")
271 #count<-0
272 #split.screen(c(2,2))
273 #for (tab in list_data) {
274 #count<-count+1
275 #screen(count)
276 #par(cex=0.7)
277 #grp<-rbind(grp,grp)
278 #grp<-rbind(grp,grp)
279 #print(ncol(tab))
280
281 #####################################################################
282 #suede enfance en danger
283 tot<-read.csv2('/home/pierre/fac/suede/resultats/cat_asso1_01.csv',row.names=1)
284
285 #prof<-BuildProf01(tab,grp[,1])
286 #outp<-AsLexico(prof)
287 #chistabletot<-outp[[2]]
288
289 #print('liste des classes')
290 #listclasse<-vector(mode='integer',length=ncol(tab))
291 #for (line in 1:nrow(chistabletot)) {
292 #    if (max(chistabletot[line,])>0) {
293 #        listclasse[line]<-as.vector(which.max(chistabletot[line,]))
294 #    } else {
295 #        listclasse[line]<-3
296 #    }
297 #}
298 #classes<-listclasse
299 #classes<-classes[1:cag]
300 #print(classes)
301 #tot<-cbind(info,biblio)
302 #tot<-cbind(tot,rechdoc)
303 listclasse<-vector(mode='integer',length=ncol(dt))
304 for (line in 1:nrow(chistabletot)) {
305         if ((max(chistabletot[line,])>3.84) || (min(prof[line,])==0)) {
306                 if (max(chistabletot[line,])>3.84) {
307                         listclasse[line]<-as.vector(which.max(chistabletot[line,]))
308                 }
309                 if (min(prof[line,])==0) {
310                         print('zero')
311                         listclasse[line]<-as.vector(which.max(prof[line,]))
312                 }
313         } else {
314                 listclasse[line]<-3
315         }
316 }
317
318 print('matrice de similitude')
319 mindus<-makesimi(tot)
320 m<-mindus$m
321 eff<-mindus$eff
322
323 #mateff<-makesimipond(ministre)
324 #mateff<-makesimi(tot)  
325 #mateff<-makejac(tot)
326 #m<-mateff$mat
327 #eff<-mateff$eff
328 #m<-as.matrix(dist(t(ministre),method='binary',upper=TRUE, diag=TRUE))
329 #m<-as.matrix(simil(ministre), method='Jaccard', upper=TRUE, diag=TRUE, by_rows=FALSE)
330 #print(nrow(m))
331 #print(ncol(m))
332 #print(length(colnames(ministre)))
333 #colnames(m)<-colnames(ministre)
334 #rownames(m)<-colnames(ministre)
335 #eff<-colSums(ministre)
336 #mateffp<-makesimi(ministre)
337 #mateffp<-makesimipond(ministre)
338 #m<-mateffp$mat
339 #eff<-mateffp$eff
340
341 #matave<-makesimi(ministre)
342 #m<-matave$m
343 #eff<-matave$eff
344
345 print('couleur')
346 #rain<-rainbow(3)
347 #append(rain,'black')
348 rain<-c("green","red","white")#"yellow","pink","black")#green","blue","red","black")
349 vcol<-vector(mode='integer',length=length(eff))
350 vcolb<-vector(mode='integer',length=length(eff))
351 #classes<-classes[1:93]
352 #classes<-grp
353 #for (i in 1:length(eff)){
354 #    vcol[i]<-rain[as.integer(classes[i])]
355 #       vcolb[i]<-"black"
356 #}
357 #ll<-which(effp>=0.5)
358 #for (i in which(effp>=0.5)) {
359 #       if (i<=10) {
360 #               vcol[i]<-"blue"
361 #       } else {
362 #               vcol[i]<-"pink"
363 #       }
364 #}
365 #print('length(vcol)')
366 #print(length(vcol))
367 #vcol[1:cchaud]<-rain[1]
368 #vcol[(cchaud+1):(cchaud+cop)]<-rain[2]
369 #vcol[(cchaud+cop+1):length(eff)]<-rain[3]
370 #print(classes)
371 print('premier graph')
372 library(igraph)
373 #sink('graph.txt')
374 g1<-graph.adjacency(m,mode="lower",weighted=TRUE)
375
376 #maxtree<-maxgraph(g1)
377 #plot(maxtree, layout=layout.circle)
378 #lo<-layout.circle(g1)
379 eff<-(eff/max(eff))
380 weori<-get.edge.attribute(g1,'weight')
381 #tdel<-which(weori<3)
382 we<-(weori/max(weori))*4
383 print('arbre maximum')
384 invw<-1/weori
385 E(g1)$weight<-invw
386 g3<-minimum.spanning.tree(g1)
387 E(g3)$weight<-1/E(g3)$weight
388 #print(E(g3)$weight)
389 #sink()
390 #g3<-g1
391 wev<-eff*20
392 wee<-(E(g3)$weight/max(E(g3)$weight))*10
393 weori<-E(g3)$weight
394 print('layout')
395 #lo<-layout.kamada.kawai(g3,dim=3)
396 lo<-layout.fruchterman.reingold(g3,dim=3)
397 print('lo')
398 #print(nrow(lo))
399 #lo<-layout.sphere(g3)
400 #lo<-cbind(lo,eff)
401 vsize<-vector(mode='integer',length=nrow(lo))
402 #print(we)
403 #ecount(g1)
404 #g2<-simplify(g1)
405 #g2<-delete.edges(g2,tdel-1)
406 #tmax<-clusters(g1)
407 #print(tmax)
408 #we<-we[-tdel]
409 #weori<-weori[-tdel]
410 #plot(g1,vertex.label=colnames(m),edge.width=get.edge.attribute(g1,'weight'),layout=layout.circle,vertex.shape='none')
411 #igraph.par('print.edge.attributes',TRUE)
412 #plot(g2,vertex.label=colnames(m),vertex.size=vsize,vertex.color=vcol,edge.width=we,layout=lo)#,vertex.shape='none')#,edge.label=weori)
413 #rglplot(g3,vertex.label=colnames(m),edge.width=we,vertex.size=vsize,vertex.label.color=vcol,layout=lo,vertex.shape='none')#,edge.label=weori)
414 print('plot')
415 tkplot(g3,vertex.label=colnames(m),edge.width=wee,vertex.size=wev,vertex.label.color="black",layout=lo)#,vcolb vertex.label.dist=1)#,vertex.shape='none')#,edge.label=weori)
416 #},vertex.color=vcol
417 #rgl.bg(sphere=FALSE,color=c("black","white"))
418 #vertex.color=vcol,vertex.label.color=vcol,edge.label=weori,
419 #rgl.viewpoint(zoom=0.6)
420 #movie3d(spin3d(axis=c(0,1,0),rpm=6),10,dir='/home/pierre/workspace/iramuteq/corpus/',movie="vero_cooc_explo",clean=TRUE,convert=TRUE,fps=20)
421 #tkplot(g1,vertex.label=colnames(m),layout=layout.circle,vertex.shape='rectangle')
422 #rglplot(g1,vertex.label=colnames(m),layout=layout.circle)
423 #g1<-graph(m)
424 #tkplot(g1)
425
426
427
428
429
430 #Ministre
431 autre :
432 dm<-read.csv2('classe_mod.csv',row.names=1,header = FALSE)
433 load('RData.RData')
434 #------------------------------------------------------
435 #selectionner
436 chimax<-as.matrix(apply(chistabletot,1,max))
437 chimax<-as.matrix(chimax[,1][1:nrow(dm)])
438 chimax<-cbind(chimax,1:nrow(dm))
439 order_chi<-as.matrix(chimax[order(chimax[,1],decreasing = TRUE),])
440 elim<-which(rownames(dm) == 'faire')
441 dm<-dm[-elim,]
442 dm<-dm[chimax[,2][1:300],]
443 #-------------------------------------------------------
444 limit<-nrow(dm)
445 distm<-dist(dm,diag=TRUE)
446 distm<-as.matrix(distm)
447 g1<-graph.adjacency(distm,mode='lower',weighted=TRUE)
448 g1<-minimum.spanning.tree(g1)
449 lo<-layout.kamada.kawai(g1,dim=3)
450 mc<-rainbow(ncol(chistabletot))
451 chistabletot<-chistabletot[-elim,]
452 cc<-vector()
453 for (i in 1:limit) {
454         cc<-append(cc,which.max(chistabletot[i,]))
455 }
456 cc<-mc[cc]
457 mass<-rowSums(dm)/100
458 rglplot(g1,vertex.label = rownames(dm),vertex.label.color=cc,vertex.color=cc,vertex.size = mass, layout=lo)
459
460
461
462 autre :
463 dm<-read.csv2('enseignant_simi_opi.csv',row.names=1,na.string='')
464 mat<-matrix(0,ncol=ncol(dm),nrow=ncol(dm))
465 for (i in 1:(ncol(dm)-1)) {
466         for (j in (i+1):ncol(dm)){
467         tab<-table(dm[,i],dm[,j])
468         chi<-chisq.test(tab)
469         mat[i,j]<-chi$statistic
470         mat[j,i]<-chi$statistic
471         }
472 }
473 mat<-ifelse(mat>3.84,mat,0)
474 mat<-ifelse(is.na(mat),0,mat)
475 mat<-ifelse(is.infinite(mat),0,mat)
476 cs<-colSums(mat)
477 tovire<-which(cs==0)
478 if (!is.integer(tovire)){
479 mat<-mat[-tovire,]
480 mat<-mat[,-tovire]
481 }
482 cn<-colnames(dm)
483 if (!is.integer(tovire)) cn<-cn[-tovire]
484 g1<-graph.adjacency(mat,mode='lower',weighted=TRUE)
485 lo<-layout.fruchterman.reingold(g1,dim=2)
486 wei<-E(g1)$weight
487 plot(g1,vertex.label=cn,vertex.size=0.1,edge.width=wei,edge.label=round(wei,2),layout=lo)
488
489
490
491 levels.n<-mj$levels.n
492 levelnames<-mj$levelnames
493 cn<-mj$colnames
494 count<-0
495 for (j in 1:ncol(dm)) {
496         for (i in 1:levels.n[j]) {
497                 count<-count+1
498                 print(paste(cn[j],'\\.',sep=''))
499                 levelnames[count]<-gsub(paste(cn[j],'\\.',sep=''),'',levelnames[count])
500         }
501 }
502