medite.MediteAppli.test.ijcai

41 """ Génère un texte synthétique à partir d'un texte donné. 42 On connait également l'alignement entre les 2 textes"""

43 - def __init__(self,texte='ABC', tx=None, seed=None, moves=True, moveOnly=False):

44 """texte=texte en entrée 45 tx=taux de modification 46 seed= seed pour initialiser le random 47 moves=si true des déplacement sont générés par le modèle""" 48 self.t1 = texte 49 self.moves = moves 50 self.moveOnly = moveOnly 51 self.len_t1 = len(self.t1) 52 if tx is not None: 53 self.TxIns = self.TxSup = self.TxRemp = self.TxMov = tx 54 else: 55 self.TxIns = self.TxSup = self.TxRemp = self.TxMov = 0.05 56 if not self.moves: self.TxMov = 0 57 if self.moveOnly: 58 self.TxIns = self.TxSup = self.TxRemp = 0.0 59 self.TxInv = 1 - (self.TxIns + self.TxSup + self.TxRemp + self.TxMov) 60 61 if seed is not None: 62 random.seed(seed) 63 else: 64 random.seed(222) 65 66 self.taille_bloc = 100 67 self.sep = """ .!\r,\n:\t;-?"'`’()""" 68 69 self.distrib_taille = 'GAUSS' # gaussienne pour la taille des séquences modifiantes 70 self.mu = 15 71 self.variance = 10

72

73 - def getModifiedText(self):

74 listePos, len_lPos, lTexte = self._getModifiedTextLN() 75 self._getModifiedTextLN2(listePos, len_lPos, lTexte)

76

77 - def _getModifiedText1(self):

78 """Crée le texte modifié et l'alignement""" 79 # nobmre de modifications pour chaque opération 80 NbIns = int(self.TxIns * self.len_t1) ; NbSup = int(self.TxSup * self.len_t1) 81 NbRemp = int(self.TxRemp * self.len_t1) ; NbMov = int(self.TxMov * self.len_t1) 82 # liste stockant les caractères du nouveau texte + les suppresions 83 lTexte = list(self.t1) 84 # liste stockant les opérations à chaque position dabns les 2 textes 85 lPos = [0 for i in xrange(self.len_t1)] 86 len_lPos = self.len_t1 87 while NbIns > 0 or NbSup > 0 or NbRemp > 0 or NbMov > 0: 88 assert len(lPos) == len(lTexte) == len_lPos 89 if NbIns > 0: #INS = 1 90 # recherche d'une position vierge 91 pos = random.randint(0,len_lPos-1) 92 while lPos[pos] != 0: 93 pos = random.randint(0,len_lPos-1) 94 # taille du bloc inséré 95 nb = min(random.randint(1,self.taille_bloc),NbIns) 96 # tq le bloc ne chevauche pas un autre bloc 97 while nb > 0 and pos < len_lPos and lPos[pos] == 0: 98 lPos.insert(pos,1) 99 #char = 'X'# chr(ord(lTexte[pos]) + 1) 100 # choix du caractère inséré 101 ch = random.randint(65,122) 102 while ch == ord(lTexte[pos]): ch = random.randint(65,122) 103 char = chr(ch) 104 lTexte.insert(pos, char) 105 len_lPos += 1 ; NbIns -= 1 106 nb -= 1 ; pos += 1 107 108 if NbSup > 0: # SUP = 2 109 pos = random.randint(0,len_lPos-1) 110 while lPos[pos] != 0: 111 pos = random.randint(0,len_lPos-1) 112 nb = min(random.randint(1,self.taille_bloc),NbSup) 113 while nb > 0 and pos < len_lPos and lPos[pos] == 0: 114 lPos[pos] = 2 115 lTexte[pos] = '*' 116 #len_lPos += 1 ; 117 NbSup -= 1 118 nb -= 1 ; pos += 1 119 120 if NbRemp > 0: # REMP = 3 121 pos = random.randint(0,len_lPos-1) 122 while lPos[pos] != 0: 123 pos = random.randint(0,len_lPos-1) 124 nb = min(random.randint(1,self.taille_bloc),NbRemp) 125 while nb > 0 and pos < len_lPos and lPos[pos] == 0: 126 lPos[pos] = 3 127 #char = 'W' #chr(ord(lTexte[pos]) + 1) 128 ch = random.randint(65,122) 129 while ch == ord(lTexte[pos]): ch = random.randint(65,122) 130 char = chr(ch) 131 lTexte[pos] = char 132 #len_lPos += 1 ; 133 NbRemp -= 1 134 nb -= 1 ; pos += 1 135 136 if NbMov > 0: # MOV = 4 / 5 137 nb = min(random.randint(1,self.taille_bloc),NbMov) 138 pos1 = random.randint(0,len_lPos-1) 139 # recherche d'une pos1 OK , sans chevauchements 140 pos1OK = False 141 while not pos1OK: 142 #i = pos1 143 #while i < pos1+nb and i < len_lPos and lPos[i] == 0: i+= 1 144 bu = pos1 145 #assert type(pos1) == type(2), type(pos1) 146 #assert type(pos1+nb) == type(2), str(type(pos1+nb))+str(nb)+'/'+str(pos1+nb) 147 t = numpy.array(lPos[pos1:pos1+nb]) 148 if pos1+nb < len_lPos and numpy.sum(t) == 0: pos1OK = True 149 else: 150 pos1 = random.randint(0,len_lPos-1) 151 while bu == pos1: pos1 = random.randint(10,len_lPos-10) 152 #if i == pos1+nb-1: pos1OK = True 153 #else: pos1 = random.randint(0,len_lPos-1) 154 assert pos1OK or bu != pos1 155 156 pos2 = random.randint(0,len_lPos-1) 157 pos2OK = False 158 while not pos2OK: 159 #i = pos2 160 #while i < pos2+nb and i < len_lPos and lPos[i] == 0: i+= 1 161 bu = pos2 162 t = numpy.array(lPos[pos2:pos2+nb]) 163 if pos2+nb < len_lPos and numpy.sum(t) == 0: pos2OK = True 164 else: 165 pos2 = random.randint(0,len_lPos-1) 166 while bu == pos2: pos2 = random.randint(10,len_lPos-10) 167 #if i == pos2+nb-1: pos2OK = True 168 #else: pos2 = random.randint(0,len_lPos-1) 169 assert pos2OK or bu != pos2 170 #while lPos[pos2] != 0: 171 # pos2 = random.randint(0,len_lPos-1) 172 173 #while nb > 0: # and pos1 < len_lPos and pos2 < len_lPos and lPos[pos1] == 0 and lPos[pos2] == 0: 174 # lPos[pos1] = 4 ; lPos.insert(pos2,5) 175 # c = lTexte[pos1] 176 # lTexte[pos1] = '#' 177 # lTexte.insert(pos2, c) 178 # len_lPos += 1 179 # NbMov -= 1 180 # nb -= 1 ; pos1 += 1 ; pos2 += 1 181 #insertion des 2 blocs 182 lPos[pos1:pos1+nb] = [4] * nb 183 lPos[pos2:pos2] = [5] * nb 184 t1 = lTexte[pos1:pos1+nb] 185 lTexte[pos1:pos1+nb] = ['#'] * nb 186 lTexte[pos2:pos2] = t1 187 len_lPos += nb ; NbMov -= nb 188 return lPos, len_lPos, lTexte

189

190 - def _getModifiedText2(self, lPos, len_lPos, lTexte):

191 t2 = [] ; compt_t1 = compt_t2 = 0 192 #opérations produite sur le texte1 pour donner le texte2 193 self.t1ToT2 = numpy.zeros(self.len_t1,numpy.int8) 194 #opérations produite sur le texte 2 pour donner le 1 195 self.t2ToT1 = numpy.zeros(self.len_t1,numpy.int8) 196 #print lPos, lTexte 197 # création du nouveau texte et de l'alignement 198 for pos in xrange(len_lPos): 199 if compt_t2 >= self.len_t1: break 200 assert compt_t1 < self.len_t1 201 assert compt_t2 < self.len_t1, (compt_t2, '/', self.len_t1, '/', pos, '/', len_lPos) 202 operation = lPos[pos] ; char = lTexte[pos] 203 if operation == 0: # INV 204 t2.append(char) 205 compt_t1 += 1 ; compt_t2 += 1 206 elif operation == 1: # INS = 1 207 t2.append(char) 208 self.t2ToT1[compt_t2] = 1 209 #compt_t1 += 1 ; 210 compt_t2 += 1 211 elif operation == 2: # SUP = 2 212 self.t1ToT2[compt_t1] = 2 213 compt_t1 += 1 #; compt_t2 += 1 214 elif operation == 3: # REMP = 3 215 t2.append(char) 216 self.t1ToT2[compt_t1] = 3 217 self.t2ToT1[compt_t2] = 3 218 compt_t1 += 1 ; compt_t2 += 1 219 elif operation == 4: # DEP T1 = 4 220 self.t1ToT2[compt_t1] = 4 221 compt_t1 += 1 #; compt_t2 += 1 222 elif operation == 5: # DEP T2 = 5 223 t2.append(char) 224 self.t2ToT1[compt_t2] = 5 225 #compt_t1 += 1 ; 226 compt_t2 += 1 227 self.t2 = ''.join(t2)

466 - def _printFile(self,name,string):

467 """ Lit un fichier dans le dossier courant et renvoie une chaine """ 468 path=os.path.join(os.getcwd(),"MediteAppli","test",name) 469 f = open(path,'w') 470 f.write(string) 471 f.close()

472

473 - def ecartType(self, population):

474 n = 0 475 sum1 = 0.0 476 for x in population: 477 n = n + 1 478 sum1 = sum1 + x 479 mean = sum1/n 480 481 sum2 = 0.0 482 for x in population: 483 sum2 = sum2 + pow(x - mean,2) 484 variance2 = sum2/(n) 485 # methode + robuste aux pb vigule flotante 486 sum2 = 0.0 487 sumc = 0.0 488 for x in population: 489 sum2 = sum2 + pow(x - mean,2) 490 sumc = sumc + (x - mean) 491 variance22 = (sum2 - pow(sumc,2)/n)/(n) 492 ecartType = math.sqrt(variance22) 493 return ecartType

494

495 - def test_alignSynthetic(self):

496 """Méta-Fonction lancant les alignements 1 par 1 et sauvegardant les résultats 497 """ 498 tdf = test_data.TestDataFactory() 499 500 #self.td = f.getTestData(test_data.ClaudeBernard,"2") 501 #self.td = f.getTestData(test_data.Traduction,"1") 502 #self.td = f.getTestData(test_data.Chedid,"1") 503 #self.td.texte1 = 'ABCDA' ; self.td.texte2 = 'ZADW' 504 #self.td = f.getTestData(test_data.Bovary,"1") 505 #td = tdf.getTestData(test_data.Leroux,"1") 506 td = tdf.getTestData(test_data.Althusser, "1") 507 #self.td = f.getTestData(test_data.Verne,"1") 508 #self.td = f.getTestData(test_data.Brousse,"1") 509 510 511 planTravail = Donnees.planTravail.PlanTravail('Traduction', 'Traduction', 512 'pVS', td.file1, 'pVC', td.file2, Donnees.planTravail.Parametres(1, #self.td.getp1(), 513 td.getp2()*25, td.getp3()*1000, td.getpcarOuMot(), td.getpcaseSensitive(), 514 td.getpseparatorSensivitive(), td.getpdiacriticSensitive())) 515 logRes = logging.FileHandler(filename=os.path.join(os.getcwd(), 'log',' logRes_'+algo+'.txt'), mode='a') 516 logRes1 = logging.FileHandler(filename=os.path.join(os.getcwd(), 'log', 'logRes1_'+algo+'.txt'), mode='a') 517 try : 518 nom_csv1 = os.path.join(os.getcwd(), 'res_'+algo+'.csv') 519 file(nom_csv1) 520 #csv = open(os.path.join(os.getcwd(), 'res_'+algo+'.csv'), 'a') #mode append 521 except : 522 #le fichier csv n'existe pas, on écrit sur la premiere ligne le nom des params 523 csv = open(nom_csv1, 'a') #mode append 524 csv.write("nom_fich;tx_modif;nb_test;dep;x;y;z;x\';z\';y1;y2;y3;y4;y5;z0;z1;z2;l_inv;nb_inv1;nb_inv2;l_sup;nb_sup;l_ins;nb_ins;l_remp;nb_remp_1;nb_remp_2;l_dep;nb_dep1;nd_dep2;l_texte1;l_texte2;tps;precision;precision pond\n") 525 csv.close() 526 try : 527 nom_csv2 = os.path.join(os.getcwd(), 'res2_'+algo+'.csv') 528 file(nom_csv2) 529 #csv2 = open(os.path.join(os.getcwd(), 'res2_'+algo+'.csv'), 'a') #mode append 530 except : 531 #le fichier csv n'existe pas, on écrit sur la premiere ligne le nom des params 532 csv2 = open(nom_csv2, 'a') #mode append 533 csv2.write("nom_fich;tx_modif;nb_test;dep;"+ 534 "Pri;etPri;Moy;etMoy;Sep;etSep;Sim;etSim;Tps;etTps;"+ 535 "FMPond;etFMPond;RappPond;etRappPond;PrecPond;etPrecPond;"+ 536 "Pri1;etPri1;Pri2;etPri2;Pri3;etPri3;"+ 537 "y1;y2;y3;y4;y5;ety1;ety2;ety3;ety4;ety5;"+ 538 "l_inv;nb_inv1;nb_inv2;l_sup;nb_sup;l_ins;nb_ins;l_remp;nb_remp_1;nb_remp_2;l_dep;nb_dep1;nd_dep2;l_texte1;l_texte2;front_bloc;front_bloc_sep" + 539 "FM;etFM;Prec;etPrec;Rapp;etRapp\n") 540 csv2.close() 541 logging.getLogger('').addHandler(logRes) 542 logRes.setLevel(logging.WARNING) 543 logRes.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 544 logRes1.setLevel(logging.WARNING) 545 logRes1.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 546 547 548 # les 3 params en dur à modifier à la main, non paramétralbes par le batch 549 lFile=['ClaudeBernard']#,'Leroux']#'Chedid','Althusser','ClaudeBernard','Traduction','ClaudeBernard']#],'Althusser',#Leroux']#'Leroux']#,'Bovary']#'Ramuz', 550 #lTaux = [0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2]#[0.10]#,0.1]#0.05, 0.0375,0.075 0.05, 551 lTaux = [0.01 ]#, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2] 552 #lTaux.extend([ 0.225, 0.25, 0.275, 0.3, 0.325, 0.35, 0.375, 0.4]) 553 #lTaux.extend([ 0.425, 0.45, 0.475, 0.5, 0.525, 0.55, 0.575, 0.6]) 554 #lTaux.extend([ 0.625, 0.65, 0.675, 0.7, 0.725, 0.75, 0.775, 0.8]) 555 #lSeed = [9, 33, 222, 2228, 4750, 5666, 7564, 8654, 9123, 11235] 556 lSeed = [1 ]#, 2, 3, 4, 11, 6, 7, 8, 9, 10, 33, 222, 2228, 4750, 5666, 7564, 8654, 9123, 11235, 352]#, 1222, 568, 6574, 6487, 11114] 557 #lSeed2 = [x * 23 for x in lSeed] 558 #lSeed.extend(lSeed2) 559 #lSeed2 = [x * 13 for x in lSeed] 560 #lSeed.extend(lSeed2) 561 var = vars(self) #; print var 562 #self.cpt=1 563 if var.has_key('f'): # on reprend un pr??c??dent run qui a plant?? 564 reprise = True ; f_backup = self.f ; tx_backup = self.tx ; seed_backup = self.seed 565 else: reprise = False 566 next_iteration = False 567 568 for self.f in lFile: 569 if next_iteration or not reprise: pass 570 elif reprise and self.f != f_backup: continue 571 for self.tx in lTaux: 572 if next_iteration or not reprise: pass 573 elif reprise and self.tx != tx_backup: continue 574 if next_iteration or not reprise: 575 self.cumMoy = self.cumMoyPond = self.cumTemps = self.nb = 0.0 576 self.cumRappMoy = self.cumRappMoyPond = self.cumFMMoy = self.cumFMMoyPond = 0.0 577 self.cumX = self.cumY = self.cumZ = self.cumSim = 0.0 578 self.cumPri1 = self.cumPri2 = self.cumPri3 = self.cumSep = 0.0 579 self.listeMoy = [] ; self.listeMoyPond = [] 580 self.listeRappMoy = [] ; self.listeRappMoyPond = [] 581 self.listeFMMoy = [] ; self.listeFMMoyPond = [] 582 self.listeSim = [] ; self.listeTemps = [] 583 self.listeObjPri = [] ; self.listeObjMoy = [] ; self.listeObjSep = [] 584 self.listeObjPri1 = [] ; self.listeObjPri2 = [] ; self.listeObjPri3 = [] 585 self.listeObjY1 = [] ; self.listeObjY2 = [] ; self.listeObjY3 = [] ; self.listeObjY4 = [] ; self.listeObjY5 = [] 586 self.cumConfusionMatrice = numpy.zeros((5, 5), float) 587 self.cumXprime=0.0 588 self.cumLY = [0.0]*5 589 self.cumLZ = [0.0]*3 590 self.cumInv=[0.0]*3 591 self.cumSup = [0.0]*2 592 self.cumIns = [0.0]*2 593 self.cumRemp = [0.0]*3 594 self.cumDep = [0.0]*3 595 self.cumLTexte1 = 0.0 596 self.cumLTexte2 = 0.0 597 self.cumTps= 0.0 598 self.cumFront_bloc = 0.0 599 self.cumFront_bloc_sep = 0.0 600 for self.seed in lSeed: 601 if next_iteration and reprise : 602 reprise = False 603 next_iteration = False # on est sur le bon 604 elif reprise and self.seed != seed_backup: continue 605 elif reprise and not next_iteration: next_iteration = True ; continue # on est sur la dernière effectuée, on passe à la suivante 606 #elif reprise and next_iteration: 607 # reprise = False ; next_iteration = False # on est sur le bon 608 logging.warning("========================") 609 logging.warning("Iteration "+str(lSeed.index(self.seed)+1)+ " / "+str(len(lFile)*len(lTaux)*len(lSeed))) 610 logging.warning("=====================") 611 td = tdf.getTestData(eval('test_data.'+self.f), "1") 612 synData = SyntheticData(td.gettexte1(), self.tx, self.seed, moveOnly=False)#+self.td.gettexte2()) 613 log = logging.FileHandler(filename=os.path.join(os.getcwd(), 'log', 'log_'+self.f+'_'+str(self.tx)+'_mv'+str(synData.moves)+'_'+algo+'_'+str(self.seed)+'.txt'), mode='w') 614 log.setLevel(5) 615 log.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 616 logging.getLogger('').addHandler(log) 617 logging.debug('===================================================') 618 logging.debug('log fichier='+self.f+' / Taux ins/del/remp='+str(self.tx)+' / seed='+str(self.seed)+' / moves='+str(synData.moves)) 619 logging.debug('cararctères générés au hasard par blocs de 1 à 25') 620 moy, moyPond, temps, confusionMatrice, x, y, z, sim, xPrime, listeY, listeZ, dico,tps, (pri1,pri2,pri3), sep, Rappmoy, RappmoyPond, fmes, fmesPond = self._mainTest(td, planTravail, synData) 621 logging.debug('===================================================\n\n') 622 logging.getLogger('').removeHandler(log) 623 log.close() 624 625 self.cumTps+=tps 626 self.cumMoy += moy ; self.cumMoyPond += moyPond 627 self.cumRappMoy += Rappmoy ; self.cumRappMoyPond += RappmoyPond 628 self.cumFMMoy += fmes ; self.cumFMMoyPond += fmesPond 629 self.cumTemps += temps ; self.nb += 1 630 self.cumX += x ; self.cumY += y ; self.cumZ += z ; self.cumSim += sim 631 self.cumPri1 += pri1; self.cumPri2 += pri2 ; self.cumPri3 += pri3 632 self.cumSep += sep 633 self.cumXprime += xPrime 634 635 # somme l'itération courante avec les précédentes 636 self.cumLY=[x[0]+x[1] for x in zip(self.cumLY, listeY)] 637 self.cumLZ=[x[0]+x[1] for x in zip(self.cumLZ, listeZ)] 638 self.cumInv=[x[0]+x[1] for x in zip(self.cumInv, dico['inv'])] 639 self.cumSup = [x[0]+x[1] for x in zip(self.cumSup, dico['sup'])] 640 self.cumIns = [x[0]+x[1] for x in zip(self.cumIns, dico['ins'])] 641 self.cumRemp = [x[0]+x[1] for x in zip(self.cumRemp, dico['remp'])] 642 self.cumDep = [x[0]+x[1] for x in zip(self.cumDep, dico['dep'])] 643 self.cumLTexte1 += dico['lTexte1'] 644 self.cumLTexte2 += dico['lTexte2'] 645 self.cumFront_bloc += dico['front'][0] 646 self.cumFront_bloc_sep += dico['front'][1] 647 numpy.add(self.cumConfusionMatrice, confusionMatrice, self.cumConfusionMatrice) 648 arrondi = 2 649 cumPri =(1.0/3)*self.cumPri1 + (1.0/3)*self.cumPri2 + (1.0/3)*self.cumPri3 650 moyPri = round(100 * cumPri / self.nb,arrondi) 651 moyY = round(100 * self.cumY / self.nb,arrondi) 652 moySep = round(100 * self.cumSep / self.nb,arrondi) 653 moySim = round((1.0/3)*moyPri + (1.0/3)*moyY + (1.0/3)*moySep, arrondi) 654 655 Pri = (1.0/3)*pri1 + (1.0/3)*pri2 + (1.0/3)*pri3 656 self.listeMoy.append(moy) ; self.listeMoyPond.append(moyPond) 657 self.listeRappMoy.append(Rappmoy) ; self.listeRappMoyPond.append(RappmoyPond) 658 self.listeFMMoy.append(fmes) ; self.listeFMMoyPond.append(fmesPond) 659 660 self.listeSim.append(sim) ; self.listeTemps.append(temps) 661 self.listeObjPri.append(Pri) ; self.listeObjMoy.append(y) ; self.listeObjSep.append(sep) 662 self.listeObjPri1.append(pri1) ; self.listeObjPri2.append(pri2) ; self.listeObjPri3.append(pri3) 663 self.listeObjY1.append(listeY[0]) ; self.listeObjY2.append(listeY[1]) ; self.listeObjY3.append(listeY[2]) 664 self.listeObjY4.append(listeY[3]) ; self.listeObjY5.append(listeY[4]) 665 666 output = open(os.path.join(os.getcwd(), 'test_MediteAppli_'+algo+'.pkl'), 'wb') 667 cPickle.dump(self, output) 668 output.close() 669 #self.cpt+=1 670 671 if not reprise: # log des cumuls 672 673 logging.warning('log fichier='+self.f+' / Taux ins/del/remp='+str(self.tx)+' / moves='+str(synData.moves)) 674 logging.warning('moyenne des moyennes = %f', 100*self.cumMoy/self.nb) 675 logging.warning('moyenne des moyennes pondérées = %f', 100*self.cumMoyPond/self.nb) 676 m = int((self.cumTemps/self.nb)/60) ; s = (self.cumTemps/self.nb)-(m*60) 677 logging.warning('moyenne des temps = '+str(self.cumTemps/self.nb)+' = '+str(m)+' mn '+str(s)+' s') 678 logging.warning('moyenne des matrice de confusion = '+str(self.cumConfusionMatrice/self.nb)) 679 logging.warning('moyenne des x = '+str(100*self.cumX/self.nb)+', des y = '+str(100*self.cumY/self.nb)+', des z = '+str(100*self.cumZ/self.nb)) 680 logging.warning('moyenne des sim 0.5x+0.35y+0.15z = '+str(100*self.cumSim/self.nb)) 681 logging.warning("moyenne des X\'= "+str(100*self.cumXprime/self.nb)) 682 logging.warning("moyenne des PRI = "+str(moyPri)) 683 logging.warning("moyenne des SEP = "+str(moySep)) 684 logging.warning("moyenne des SIM = "+str(moySim)) 685 logging.warning('===================================================\n\n') 686 logRes.flush() 687 logging.getLogger('').removeHandler(logRes) 688 logging.getLogger('').addHandler(logRes1) 689 logging.warning('log fichier='+self.f+' / Taux ins/del/remp='+str(self.tx)+' / moves='+str(synData.moves)) 690 logging.warning('moyenne des moyennes = %f', 100*self.cumMoy/self.nb) 691 logging.warning('moyenne des moyennes pondérées = %f', 100*self.cumMoyPond/self.nb) 692 logging.warning('moyenne des temps = '+str(self.cumTemps/self.nb)+' = '+str(m)+' mn '+str(s)+' s') 693 logging.warning('moyenne des matrice de confusion = '+str(self.cumConfusionMatrice/self.nb)) 694 logging.warning('moyenne des x = '+str(100*self.cumX/self.nb)+', des y = '+str(100*self.cumY/self.nb)+', des z = '+str(100*self.cumZ/self.nb)) 695 logging.warning('moyenne des sim 0.5x+0.35y+0.15z = '+str(100*self.cumSim/self.nb)) 696 logging.warning("moyenne des X\'= "+str(100*self.cumXprime/self.nb)) 697 logging.warning("moyenne des PRI = "+str(moyPri)) 698 logging.warning("moyenne des SEP = "+str(moySep)) 699 logging.warning("moyenne des SIM = "+str(moySim)) 700 701 afficheY= [round(100*x/self.nb,arrondi) for x in self.cumLY ] 702 logging.warning('moyenne des y1 = '+str(afficheY[0])+', moyenne des y2 = '+str(afficheY[1])+', moyenne des y3 = '+str(afficheY[2])+', moyenne des y4 = '+str(afficheY[3])+', moyenne des y5 = '+str(afficheY[4])) 703 afficheZ = [round(100*x/self.nb,arrondi) for x in self.cumLZ] 704 logging.warning('moyenne des z0 = '+str(afficheZ[0])+', moyenne des z1 = '+str(afficheZ[1])+', moyenne des z2 = '+str(afficheZ[2])) 705 afficheInv = [round(x/self.nb,arrondi) for x in self.cumInv] 706 logging.warning('Invariants : longueur moyenne = '+str(afficheInv[0])+', nombre moyen de blocs pour le texte 1 = '+str(afficheInv[1])+', nombre moyen de blocs pour le texte2 = '+str(afficheInv[2])) 707 afficheSup = [round(x/self.nb,arrondi) for x in self.cumSup] 708 logging.warning('Supprimés : longueur moyenne = '+str(afficheSup[0])+', nombre moyen de blocs = '+str(afficheSup[1])) 709 afficheIns = [round(x/self.nb,arrondi) for x in self.cumIns] 710 logging.warning('Inserés : longueur moyenne = '+str(afficheIns[0])+', nombre moyen de blocs = '+str(afficheIns[1])) 711 afficheRemp = [round(x/self.nb,arrondi) for x in self.cumRemp] 712 logging.warning('Remplacés : longueur moyenne = '+str(afficheRemp[0])+', nombre moyen de blocs pour le texte1 = '+str(afficheRemp[1])+', nombre moyen de blocs pour le texte2 = '+str(afficheRemp[2])) 713 afficheDep = [round(x/self.nb,arrondi) for x in self.cumDep] 714 logging.warning('Déplacés : longueur moyenne = '+str(afficheDep[0])+', nombre moyen de blocs pour le texte1 = '+str(afficheDep[1])+', nombre moyen de blocs pour le texte2 = '+str(afficheDep[2])) 715 logging.warning('longueur texte1 '+str(self.cumLTexte1/self.nb)) 716 logging.warning('longueur texte2 '+str(self.cumLTexte2/self.nb)) 717 logging.warning("moyenne des z\' = "+str(1.0/3*(afficheZ[0]+afficheZ[1]+afficheZ[2]))) 718 719 logging.warning('===================================================\n\n') 720 logging.getLogger('').removeHandler(logRes1) 721 logRes1.flush() 722 logging.getLogger('').addHandler(logRes) 723 #str(afficheY)[1:-1].replace(',', ';') 724 # le [1:-1] enlève les crochets de début ef fin de afficheY qui est la liste des 725 # % des Y pondérés de tous les résultats 726 # et le replace pour avoir les séparateurs de csv 727 #csv.write('type fichier;x;y;z;x\';z\';y1;y2;y3;y4;y5;z0;z1;z2;l_inv;nb_inv1;nb_inv2;l_sup;nb_sup;l_ins;nb_ins;l_remp;nb_remp_1;nb_remp_2;l_dep;nb_dep1;nd_dep2;l_texte1;l_texte2;\n') 728 strCsv=self.f+';'+str(self.tx)+';'+str(len(lSeed))+';'+ \ 729 str(synData.moves)+';'+str(100*self.cumX/self.nb)+';'+\ 730 str(100*self.cumY/self.nb)+';'+str(100*self.cumZ/self.nb)+';'+\ 731 str(100*self.cumXprime/self.nb)+';'+\ 732 str(1.0/3*(afficheZ[0]+afficheZ[1]+afficheZ[2]))+';'+\ 733 str(afficheY)[1:-1].replace(',', ';')+';'+\ 734 str(afficheZ)[1:-1].replace(',', ';')+';'+\ 735 str(afficheInv)[1:-1].replace(',', ';')+';'+\ 736 str(afficheSup)[1:-1].replace(',', ';')+';'+\ 737 str(afficheIns)[1:-1].replace(',', ';')+';'+\ 738 str(afficheRemp)[1:-1].replace(',', ';')+';'+\ 739 str(afficheDep)[1:-1].replace(',', ';')+';'+\ 740 str(self.cumLTexte1/self.nb)+';'+str(self.cumLTexte2/self.nb)+';'+\ 741 str(self.cumTps/self.nb)+';'+str(100*self.cumMoy/self.nb)+';'+\ 742 str(100*self.cumMoyPond/self.nb) 743 csv = open(nom_csv1, 'a') #mode append 744 csv.write(strCsv+';\n') 745 csv.close() 746 # =================================================== 747 strCsv2=self.f+';'+str(self.tx)+';'+str(len(lSeed))+';'+ \ 748 str(synData.moves)+';'+ \ 749 str(moyPri)+';'+\ 750 str(round(100 *self.ecartType(self.listeObjPri),arrondi))+';'+\ 751 str(moyY)+';'+ \ 752 str(round(100 *self.ecartType(self.listeObjMoy),arrondi))+';'+\ 753 str(moySep)+';'+\ 754 str(round(100 *self.ecartType(self.listeObjSep),arrondi))+';'+\ 755 str(moySim)+';'+\ 756 str(round(100 *self.ecartType(self.listeSim),arrondi))+';'+\ 757 str(round(self.cumTps/self.nb,arrondi))+';'+\ 758 str(round(self.ecartType(self.listeTemps),arrondi))+';'+\ 759 str(round(100*self.cumFMMoyPond/self.nb,arrondi))+ ';' +\ 760 str(round(100 *self.ecartType(self.listeFMMoyPond),arrondi))+';'+\ 761 str(round(100*self.cumRappMoyPond/self.nb,arrondi))+ ';' +\ 762 str(round(100 *self.ecartType(self.listeRappMoyPond),arrondi))+';'+\ 763 str(round(100*self.cumMoyPond/self.nb,arrondi))+ ';' +\ 764 str(round(100 *self.ecartType(self.listeMoyPond),arrondi))+';'+\ 765 str(round(100*self.cumPri1/self.nb,arrondi))+';'+\ 766 str(round(100 *self.ecartType(self.listeObjPri1),arrondi))+';'+\ 767 str(round(100*self.cumPri2/self.nb,arrondi))+';'+\ 768 str(round(100 *self.ecartType(self.listeObjPri2),arrondi))+';'+\ 769 str(round(100*self.cumPri3/self.nb,arrondi))+';'+\ 770 str(round(100 *self.ecartType(self.listeObjPri3),arrondi))+';'+\ 771 str(afficheY)[1:-1].replace(',', ';')+';'+\ 772 str(round(100 *self.ecartType(self.listeObjY1),arrondi))+';'+\ 773 str(round(100 *self.ecartType(self.listeObjY2),arrondi))+';'+\ 774 str(round(100 *self.ecartType(self.listeObjY3),arrondi))+';'+\ 775 str(round(100 *self.ecartType(self.listeObjY4),arrondi))+';'+\ 776 str(round(100 *self.ecartType(self.listeObjY5),arrondi))+';'+\ 777 str(afficheInv)[1:-1].replace(',', ';')+';'+\ 778 str(afficheSup)[1:-1].replace(',', ';')+';'+\ 779 str(afficheIns)[1:-1].replace(',', ';')+';'+\ 780 str(afficheRemp)[1:-1].replace(',', ';')+';'+\ 781 str(afficheDep)[1:-1].replace(',', ';')+';'+\ 782 str(round(self.cumLTexte1/self.nb,arrondi))+';'+\ 783 str(round(self.cumLTexte2/self.nb,arrondi))+';'+\ 784 str(round(self.cumFront_bloc/self.nb,arrondi))+';'+\ 785 str(round(self.cumFront_bloc_sep/self.nb,arrondi))+';'+\ 786 str(round(100*self.cumFMMoy/self.nb,arrondi))+';'+\ 787 str(round(100 *self.ecartType(self.listeFMMoy),arrondi))+';'+\ 788 str(round(100*self.cumRappMoy/self.nb,arrondi))+';'+\ 789 str(round(100 *self.ecartType(self.listeRappMoy),arrondi))+';'+\ 790 str(round(100*self.cumMoy/self.nb,arrondi))+';'+\ 791 str(round(100 *self.ecartType(self.listeMoy),arrondi)) 792 csv2 = open(nom_csv2, 'a') #mode append 793 csv2.write(strCsv2+';\n') 794 csv2.close()

795

796 - def test_alignSyntheticLeraningPoids(self):

797 """Méta-Fonction lancant les alignements 1 par 1 798 et sauvegardant les résultats """ 799 tdf = test_data.TestDataFactory() 800 td = tdf.getTestData(test_data.Leroux,"1") 801 802 planTravail = Donnees.planTravail.PlanTravail('Traduction', 'Traduction', 803 'pVS', td.file1, 'pVC', td.file2, Donnees.planTravail.Parametres(1,#self.td.getp1(), 804 td.getp2()*25,td.getp3()*1000,td.getpcarOuMot(),td.getpcaseSensitive(), 805 td.getpseparatorSensivitive(),td.getpdiacriticSensitive())) 806 logRes = logging.FileHandler(filename=os.path.join(os.getcwd(),'logRes.txt'),mode='a') 807 logRes.setLevel(logging.WARNING) 808 logRes.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 809 logging.getLogger('').addHandler(logRes) 810 outPoids = open(os.path.join(os.getcwd(),'logPoids.txt'),'a') 811 lFile=['Althusser'] #'Leroux']#'Chedid','Althusser','ClaudeBernard','Traduction','ClaudeBernard']#],'Althusser',#Leroux']#'Leroux']#,'Bovary']#'Ramuz', 812 lTaux = [0.1]#,0.1]#,0.1]#0.05, 0.0375,0.075 0.05, 813 lSeed = [9,33,222,2228, 4750] #9,33, 814 lCoeff = self.generateTupleCoeff() 815 #print lCoeff 816 self.pos_lCoeff = 0 817 var = vars(self) #; print var 818 if var.has_key('f'): # on reprend un précédent run qui a planté 819 reprise = True ; f_backup = self.f ; tx_backup = self.tx 820 seed_backup = self.seed ; coeff_backup = self.coeff 821 else: reprise = False 822 next_iteration = False 823 for self.f in lFile: 824 if next_iteration or not reprise: pass 825 elif reprise and self.f != f_backup: continue 826 for self.tx in lTaux: 827 if next_iteration or not reprise: pass 828 elif reprise and self.tx != tx_backup: continue 829 for self.coeff in lCoeff: 830 if next_iteration or not reprise: pass 831 elif reprise and self.coeff != coeff_backup: continue 832 833 if next_iteration or not reprise: 834 self.cumMoy = self.cumMoyPond = self.cumTemps = self.nb = 0.0 835 self.cumX = self.cumY = self.cumZ = self.cumSim = 0.0 836 self.cumConfusionMatrice = numpy.zeros((5,5),float) 837 for self.seed in lSeed: 838 if next_iteration and reprise: 839 reprise = False ; next_iteration = False # on est sur le bon 840 elif reprise and self.seed != seed_backup: continue 841 elif reprise and not next_iteration: 842 next_iteration = True ; continue # on est sur la dernière effectuée, on passe à la suivante 843 #elif reprise and next_iteration: 844 # reprise = False ; next_iteration = False # on est sur le bon 845 846 td = tdf.getTestData(eval('test_data.'+self.f),"1") 847 synData = SyntheticData(td.gettexte1(), self.tx, self.seed)#+self.td.gettexte2()) 848 log = logging.FileHandler(filename=os.path.join(os.getcwd(),'log_'+self.f+'_'+str(self.tx)+'_'+str(self.seed)+'_mv'+str(synData.moves)+'.txt'),mode='w') 849 log.setLevel(5) 850 log.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 851 logging.getLogger('').addHandler(log) 852 logging.debug('===================================================') 853 logging.debug('log fichier='+self.f+' / Taux ins/del/remp='+str(self.tx)+ 854 ' / seed='+str(self.seed)+' / moves='+str(synData.moves)+' / coeff='+str(self.coeff)) 855 logging.debug('chararctères générés au hasard par blocs de 1 à 25') 856 moy,moyPond,temps,confusionMatrice,x,y,z,sim = self._mainTest(td,planTravail,synData, self.coeff) 857 logging.debug('===================================================\n\n') 858 logging.getLogger('').removeHandler(log) 859 log.close() 860 self.cumMoy += moy ; self.cumMoyPond += moyPond ; self.cumTemps += temps ; self.nb += 1 861 self.cumX += x ; self.cumY += y ; self.cumZ += z ; self.cumSim += sim 862 numpy.add(self.cumConfusionMatrice,confusionMatrice, self.cumConfusionMatrice) 863 output = open(os.path.join(os.getcwd(),'test_MediteAppli.pkl'), 'wb') 864 cPickle.dump(self, output) 865 output.close() 866 867 if not reprise: # log des cumuls 868 logging.warning('log fichier='+self.f+' / Taux ins/del/remp='+str(self.tx)+' / moves='+str(synData.moves)+' / coeff='+str(self.coeff)) 869 logging.warning('moyenne des moyennes = %f',100*self.cumMoy/self.nb) 870 logging.warning('moyenne des moyennes pondérées = %f',100*self.cumMoyPond/self.nb) 871 m = int((self.cumTemps/self.nb)/60) ; s = (self.cumTemps/self.nb)-(m*60) 872 logging.warning('moyenne des temps = '+str(self.cumTemps/self.nb)+' = '+str(m)+' mn '+str(s)+' s') 873 logging.warning('moyenne des matrice de confusion = '+str(self.cumConfusionMatrice/self.nb)) 874 logging.warning('moyenne des x = '+str(100*self.cumX/self.nb)+', des y = '+str(100*self.cumY/self.nb)+', des z = '+str(100*self.cumZ/self.nb)) 875 logging.warning('moyenne des sim 0.5x+0.35y+0.15z = '+str(100*self.cumSim/self.nb)) 876 logging.warning('===================================================\n\n') 877 logRes.flush() 878 moyMoy = (100*self.cumMoy/self.nb + 100*self.cumMoyPond/self.nb) / 2 879 poids = str(self.coeff[0])+';'+str(self.coeff[1])+';'+str(self.coeff[2])+';'+str(100*self.cumMoy/self.nb)+';'+str(100*self.cumMoyPond/self.nb)+';'+str(moyMoy)+';'+str(self.cumTemps/self.nb)+'\n' 880 outPoids.write(poids) 881 outPoids.flush() 882 outPoids.close()

883

884 - def generateTupleCoeff(self, granularite=0.1, nbCoeff=3):

885 tupleListe = [] 886 for i in xrange(20,-1,-1):#[1, 0.5, 0] #[1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]: 887 for j in xrange(20,-1,-1): #[1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]: 888 for k in xrange(20,-1,-1): #[1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]: 889 if i + j + k != 20: continue 890 tupleListe.append((1.0*i/20,1.0*j/20,1.0*k/20)) 891 #print tupleListe 892 print len(tupleListe) 893 #for i,j,k in tupleListe: 894 # assert int(i+j+k) == 1, int(i+j+k) 895 return tupleListe

896

897 - def _mainTest(self, td, planTravail, synData, coeff=None):

898 """Fonction lançant l'alignement""" 899 900 901 #trace("res = et.run(textesApparies=True, dossierRapport=dgm.getDG('Traduction', 'Traduction'))",locals()) 902 #self._printFile('ChedidSynthe.txt', synData.t2) 903 #self.texte = self.synData.t1 + self.synData.t2 904 #print len(self.texte),len(self.td.gettexte1()+self.td.gettexte2()) 905 #print self.synData.t2[:500] 906 synData.getModifiedText() 907 f1 = open(os.path.join(os.getcwd(),'t1.txt'),'w') 908 f2 = open(os.path.join(os.getcwd(),'t2.txt'),'w') 909 f1.write(synData.t1) ; f2.write(synData.t2) 910 f1.close() ; f2.close() 911 912 a = time.time() 913 if algo.lower() == 'HMMDep'.lower(): 914 et = fengAlgoMedite(synData.t1, synData.t2, td.getp2(), td.getpcaseSensitive(), td.getpseparatorSensivitive(), td.getpdiacriticSensitive(), planTravail,calculDep=True) 915 dgm = Controleurs.DGManager.DGManager() 916 et.run() 917 elif algo.lower() == 'HMMNoDep'.lower(): 918 et = fengAlgoMedite(synData.t1, synData.t2, td.getp2(), td.getpcaseSensitive(), td.getpseparatorSensivitive(), td.getpdiacriticSensitive(), planTravail,calculDep=False) 919 dgm = Controleurs.DGManager.DGManager() 920 et.run() 921 922 else : 923 et = MediteAppli.MediteAppli.ecartTextesRecur2(td.gettexte1(), td.gettexte2(), #synData.t1, synData.t2, 924 td.getp1(), 925 td.getp2(), td.getp3(), 926 td.getpcarOuMot(), td.getpcaseSensitive(), 927 td.getpseparatorSensivitive(), td.getpdiacriticSensitive(), 928 planTravail, 929 algo) 930 dgm = Controleurs.DGManager.DGManager() 931 et.run(textesApparies=False, dossierRapport=dgm.getDG('Traduction', 'Traduction'), coeff=coeff) 932 b = time.time() 933 m = (int(b) - int(a))/60 ; s = b-a-m*60 934 logging.debug('durée = '+str(b-a)+' / '+str(m)+' mn '+str(s)+' s') 935 bbl = et.bbl 936 #print bbl.liste 937 moy, moyPond, confusionMatrice, Rappmoy, RappmoyPond, fmes, fmesPond = self.compare2(bbl, synData, len(synData.t1)) 938 x, y, z, sim, dicoSommes, listeY, listeZ, (pri1,pri2,pri3), sep = bbl.evaluation() 939 xPrime = float(dicoSommes['inv'][0]+dicoSommes['dep'][0])/float(dicoSommes['lTexte1']+dicoSommes['lTexte2']) 940 logging.debug('FIN') 941 return moy, moyPond, b-a, confusionMatrice, x, y, z, sim, xPrime, listeY, listeZ, dicoSommes, b-a, (pri1,pri2,pri3), sep, Rappmoy, RappmoyPond, fmes, fmesPond

942 943

944 - def compare2(self,bbl,synData, len_t1):

945 """Comparaison avec l'alignement de référence""" 946 t1ToT2 = synData.t1ToT2 947 t2ToT1 = synData.t2ToT1 948 res1 = numpy.zeros(len_t1,numpy.int8) 949 res2 = numpy.zeros(len_t1,numpy.int8) 950 # parcours de l'alignement et création de 2 tableaux correspondant aux 951 # opérations effectuées sur chaque texte par l'aligneur 952 for (B1,B2) in bbl.liste: 953 if B1 is not None: 954 B1_type = B1[0] ; B1_deb = B1[1] ; B1_fin = B1[2] 955 if B1_type == 'S': # SUP = 2 956 for i in xrange(B1_deb,B1_fin): res1[i] = 2 957 elif B1_type == 'R': # REMP = 3 958 for i in xrange(B1_deb,B1_fin): res1[i] = 3 959 elif B1_type == 'BC': # INV = 0 960 for i in xrange(B1_deb,B1_fin): res1[i] = 0 961 elif B1_type == 'D': 962 if synData.moves: 963 for i in xrange(B1_deb,B1_fin): res1[i] = 4 964 else: 965 for i in xrange(B1_deb,B1_fin): res1[i] = 2 966 if B2 is not None: 967 B2_type = B2[0] ; B2_deb = B2[1] - len_t1 ; B2_fin = B2[2] - len_t1 968 if B2_type == 'I': # INS = 1 969 for i in xrange(B2_deb,B2_fin): res2[i] = 1 970 elif B2_type == 'R': # REMP = 3 971 for i in xrange(B2_deb,B2_fin): res2[i] = 3 972 elif B2_type == 'BC': # INV = 0 973 for i in xrange(B2_deb,B2_fin): res2[i] = 0 974 elif B2_type == 'D': 975 if synData.moves: 976 for i in xrange(B2_deb,B2_fin): res2[i] = 5 977 else: 978 for i in xrange(B2_deb,B2_fin): res2[i] = 1 979 980 # comparaison entre les résultats de l'aligneur et de l'alignement de référence 981 ins = insOK = dele = deleOK = remp = rempOK = mov = movOK = inv = invOK = 0.0 982 dAppli = {} 983 dAppli[0] = dAppli[1] = dAppli[2] = dAppli[3] = dAppli[4] = dAppli[5] = 0.0 984 dinv={}; dsup={}; dins={}; dmov={}; dremp= {} 985 for i in xrange(len_t1): 986 if t1ToT2[i] == 0: 987 if res1[i] == 0: invOK += 1 988 try: dinv[res1[i]] += 1 989 except KeyError: dinv[res1[i]] = 1.0 990 inv += 1 991 dAppli[res1[i]] += 1 992 if t1ToT2[i] == 2: # SUP 993 if res1[i] == 2: deleOK += 1 ; dele += 1 994 #elif res1[i] == 3: rempOK += 1 ; remp += 1 995 else: dele += 1 996 try: dsup[res1[i]] += 1 997 except KeyError: dsup[res1[i]] = 1.0 998 dAppli[res1[i]] += 1 999 if t1ToT2[i] == 3: # REMP 1000 #if res1[i] == 2: deleOK += 1 ; dele += 1 1001 if res1[i] == 3: rempOK += 1 ; remp += 1 1002 else: remp += 1 1003 try: dremp[res1[i]] += 1 1004 except KeyError: dremp[res1[i]] = 1.0 1005 dAppli[res1[i]] += 1 1006 if t1ToT2[i] == 4: # MOV T1 1007 assert synData.moves 1008 if res1[i] == 4: movOK += 1 ; mov += 1 1009 else: mov += 1 1010 try: dmov[res1[i]] += 1 1011 except KeyError: dmov[res1[i]] = 1.0 1012 dAppli[res1[i]] += 1 1013 1014 if t2ToT1[i] == 0: 1015 if res2[i] == 0: invOK += 1 1016 inv += 1 1017 try: dinv[res2[i]] += 1 1018 except KeyError: dinv[res2[i]] = 1.0 1019 dAppli[res2[i]] += 1 1020 if t2ToT1[i] == 1: # INS 1021 if res2[i] == 1: insOK += 1 ; ins += 1 1022 #elif res2[i] == 3: rempOK += 1 ; remp += 1 1023 else: ins += 1 1024 try: dins[res2[i]] += 1 1025 except KeyError: dins[res2[i]] = 1.0 1026 dAppli[res2[i]] += 1 1027 if t2ToT1[i] == 3: # REMP 1028 #if res2[i] == 1: insOK += 1 ; ins += 1 1029 if res2[i] == 3: rempOK += 1 ; remp += 1 1030 else: remp += 1 1031 try: dremp[res2[i]] += 1 1032 except KeyError: dremp[res2[i]] = 1.0 1033 dAppli[res2[i]] += 1 1034 if t2ToT1[i] == 5: # MOV T2 1035 assert synData.moves 1036 if res2[i] == 5: movOK += 1 ; mov += 1 1037 else: mov += 1 1038 try: dmov[res2[i]] += 1 1039 except KeyError: dmov[res2[i]] = 1.0 1040 dAppli[res2[i]] += 1 1041 1042 # calcul des stats 1043 totOK = 0.0 + insOK + deleOK + rempOK + invOK 1044 totREF = 0.0 + ins + dele + remp + inv 1045 totSYS = 0.0 + dAppli[0] + dAppli[1] + dAppli[2] + dAppli[3] 1046 invSYS = dAppli[0] ; insSYS = dAppli[1] ; supSYS = dAppli[2] 1047 rempSYS = dAppli[3] ; movSYS = dAppli[4] + dAppli[5] 1048 if synData.moves: 1049 totOK += movOK ; totREF += mov ; totSYS += dAppli[4] + dAppli[5] 1050 if dele > 0: 1051 logging.debug('deleOK/dele = %f', deleOK/dele) 1052 #print 'deleOK/dele = ', 100.0*deleOK/dele 1053 if ins > 0: 1054 logging.debug('insOK/ins = %f', insOK/ins) 1055 #print 'insOK/ins = ', 100.0*insOK/ins 1056 if remp > 0: 1057 logging.debug('rempOK/remp = %f', rempOK/remp) 1058 #print 'rempOK/remp = ', 100.0*rempOK/remp 1059 if mov > 0: 1060 logging.debug('movOK/mov = %f', movOK/mov) 1061 #print 'movOK/mov = ', 100.0*movOK/mov 1062 if inv > 0: 1063 logging.debug('invOK/inv = %f', invOK/inv) 1064 #print 'invOK/inv = ', 100.0*invOK/inv 1065 if totREF > 0: 1066 logging.debug('totOK/tot = %f', totOK/totREF) 1067 #print 'totOK/tot = ', 100.0*totOK/tot 1068 1069 if synData.moveOnly: 1070 moy = movOK/mov 1071 moyPond = (synData.TxInv*invOK/inv + synData.TxMov*movOK/mov) 1072 elif synData.moves: 1073 Precmoy = (invOK/invSYS + deleOK/supSYS + insOK/insSYS +rempOK/rempSYS + movOK/movSYS)/5 1074 PrecmoyPond = (synData.TxInv*invOK/invSYS + synData.TxSup*deleOK/supSYS + synData.TxIns*insOK/insSYS +synData.TxRemp*rempOK/rempSYS + synData.TxMov*movOK/movSYS) 1075 logging.debug('Precmoy(ins,remp,sup,mov,inv) = %f', 100.0*Precmoy) 1076 logging.debug('PrecmoyPond(ins,remp,sup,mov,inv) = %f', 100.0*PrecmoyPond) 1077 1078 Rappmoy = (invOK/inv + deleOK/dele + insOK/ins +rempOK/remp + movOK/mov)/5 1079 RappmoyPond = (synData.TxInv*invOK/inv + synData.TxSup*deleOK/dele + synData.TxIns*insOK/ins +synData.TxRemp*rempOK/remp + synData.TxMov*movOK/mov) 1080 logging.debug('Rappmoy(ins,remp,sup,mov,inv) = %f', 100.0*Rappmoy) 1081 logging.debug('RappmoyPond(ins,remp,sup,mov,inv) = %f', 100.0*RappmoyPond) 1082 else: 1083 Precmoy = (invOK/inv + deleOK/dele + insOK/ins +rempOK/remp)/4 1084 PrecmoyPond = (synData.TxInv*invOK/inv + synData.TxSup*deleOK/dele + synData.TxIns*insOK/ins +synData.TxRemp*rempOK/remp) 1085 #print 'moy(ins,remp,sup,inv) = ', 100.0*moy 1086 logging.debug('Precmoy(ins,remp,sup,inv) = %f', 100.0*Precmoy) 1087 #print 'moyPond(ins,remp,sup,inv) = ', 100.0*moyPond 1088 logging.debug('PrecmoyPond(ins,remp,sup,inv) = %f', 100.0*PrecmoyPond) 1089 # calcul des matrices de confusion 1090 import decimal 1091 decimal.getcontext() 1092 decimal.getcontext().prec = 3 1093 m = numpy.zeros((5,5),float) 1094 for ligne,d,nom in [(0,dinv,'inv'),(1,dins,'ins'),(2,dsup,'sup'),(3,dremp,'remp'),(4,dmov,'mov')]: 1095 s = 0 ; ch = '' 1096 for cle,nb in d.iteritems(): s+=nb 1097 for cle,nb in d.iteritems(): 1098 if cle == 0: str2 = 'inv' 1099 elif cle == 1: str2 = 'ins' 1100 elif cle == 2: str2 = 'sup' 1101 elif cle == 3: str2 = 'remp' 1102 else: cle = 4 ; str2 = 'mov' # MOV4 et 5 1103 ch += str2+'/'+nom+'='+str(nb/s)+' // ' 1104 m[ligne,cle] = nb/s 1105 m[ligne,4] = 0 1106 if d.has_key(4): m[ligne,4] += d[4]/s 1107 if d.has_key(5): m[ligne,4] += d[5]/s # les moves 1108 #print ch 1109 logging.debug(ch) 1110 #numpy.around(m,3,m) 1111 #print m 1112 logging.debug(m) 1113 fmes = (2.0*Precmoy*Rappmoy)/(0.0+Precmoy+Rappmoy) 1114 fmesPond = (2.0*PrecmoyPond*RappmoyPond)/(0.0+PrecmoyPond+RappmoyPond) 1115 return Precmoy,PrecmoyPond,m, Rappmoy, RappmoyPond, fmes, fmesPond

1116

1117 - def compare(self, bbl, synData, len_t1):

1118 """buggé""" 1119 t1ToT2 = synData.t1ToT2 1120 t2ToT1 = synData.t2ToT1 1121 #print t1ToT2 1122 #print t2ToT1 1123 ins = insOK = dele = deleOK = remp = rempOK = inv = invOK = 0.0 1124 for (B1,B2) in bbl.liste: 1125 if B1 is not None: 1126 B1_type = B1[0] ; B1_deb = B1[1] ; B1_fin = B1[2] 1127 if B1_type == 'S': # SUP = 2 1128 for i in xrange(B1_deb,B1_fin): 1129 if t1ToT2[i] == 2: deleOK += 1 1130 dele += 1 1131 #print 'dele1' 1132 elif B1_type == 'R': # REMP = 3 1133 for i in xrange(B1_deb,B1_fin): 1134 if t1ToT2[i] == 3 : rempOK += 1 ; remp += 1 #; print 'remp1' 1135 elif t1ToT2[i] == 2: deleOK += 1; dele += 1 #; print 'dele1' 1136 elif t1ToT2[i] == 0 and t2ToT1[i] == 1: continue#insOK += 1; ins += 1 1137 else: inv += 1 1138 elif B1_type == 'BC': # INV = 0 1139 for i in xrange(B1_deb,B1_fin): 1140 if t1ToT2[i] == 0: invOK += 1 1141 inv += 1 1142 #print 'INV1' 1143 elif B1_type == 'D': 1144 for i in xrange(B1_deb,B1_fin): 1145 if t1ToT2[i] == 2: deleOK += 1 1146 dele += 1 1147 if B2 is not None: 1148 B2_type = B2[0] ; B2_deb = B2[1] - len_t1 ; B2_fin = B2[2] - len_t1 1149 if B2_type == 'I': # INS = 1 1150 for i in xrange(B2_deb,B2_fin): 1151 if t2ToT1[i] == 1: insOK += 1 1152 ins += 1 1153 #print 'ins2' 1154 elif B2_type == 'R': # REMP = 3 1155 for i in xrange(B2_deb,B2_fin): 1156 if t2ToT1[i] == 3 : rempOK += 1 ; remp += 1 #; print 'remp22' 1157 elif t2ToT1[i] == 1: insOK += 1; ins += 1 #;print 'ins22' 1158 elif t2ToT1[i] == 0 and t1ToT2[i] == 2: continue 1159 else: inv += 1 1160 elif B2_type == 'BC': # INV = 0 1161 for i in xrange(B2_deb,B2_fin): 1162 if t2ToT1[i] == 0: invOK += 1 1163 inv += 1 1164 #print 'INV2' 1165 elif B2_type == 'D': 1166 for i in xrange(B2_deb,B2_fin): 1167 if t2ToT1[i] == 1: insOK += 1 1168 ins += 1 1169 #print insOK , deleOK , rempOK , invOK 1170 #print ins , dele , remp , inv 1171 totOK = 0.0 + insOK + deleOK + rempOK + invOK 1172 tot = 0.0 + ins + dele + remp + inv 1173 if dele > 0: 1174 logging.debug('deleOK/dele = %f', deleOK/dele) 1175 print 'deleOK/dele = ', 100.0*deleOK/dele 1176 if ins > 0: 1177 logging.debug('insOK/ins = %f', insOK/ins) 1178 print 'insOK/ins = ', 100.0*insOK/ins 1179 if remp > 0: 1180 logging.debug('rempOK/remp = %f', rempOK/remp) 1181 print 'rempOK/remp = ', 100.0*rempOK/remp 1182 if inv > 0: 1183 logging.debug('invOK/inv = %f', invOK/inv) 1184 print 'invOK/inv = ', 100.0*invOK/inv 1185 if tot > 0: 1186 logging.debug('totOK/tot = %f', totOK/tot) 1187 print 'totOK/tot = ', 100.0*totOK/tot 1188 1189 print 'moy(ins,remp,sup,inv) = ', 100.0*(invOK/inv + deleOK/dele + insOK/ins +rempOK/remp)/4 1190 logging.debug('moy(ins,remp,sup,inv) = %f', 100.0*(invOK/inv + deleOK/dele + insOK/ins +rempOK/remp)/4) 1191 1192 print 'moyPond(ins,remp,sup,inv) = ', 100.0*(synData.TxInv*invOK/inv + synData.TxSup*deleOK/dele + synData.TxIns*insOK/ins +synData.TxRemp*rempOK/remp) 1193 logging.debug('moyPond(ins,remp,sup,inv) = %f', 100.0*(synData.TxInv*invOK/inv + synData.TxSup*deleOK/dele + synData.TxIns*insOK/ins +synData.TxRemp*rempOK/remp))

Source Code for Module medite.MediteAppli.test.ijcai