# -*- coding: iso-8859-1 -*- # Copyright 20003 - 2008: Julien Bourdaillet (julien.bourdaillet@lip6.fr), Jean-Gabriel Ganascia (jean-gabriel.ganascia@lip6.fr) # This file is part of MEDITE. # # MEDITE is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # MEDITE is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Foobar; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import logging, os, os.path, string, sys import numpy.oldnumeric as Numeric import numpy.numarray as numarray import numpy.numarray.ma as ma def _readFile(name): """ Lit un fichier dans le dossier courant et renvoie une chaine """ path=os.path.join(os.getcwd(),"MediteAppli","test",name) f = open(path) res = f.read() f.close() return res def test(): comite = _readFile('comite.txt') papers = _readFile('papers.txt') lcom = comite.splitlines() logging.debug('# comite='+str(len(lcom))) sepTable = string.maketrans("""\r\n\t"""," ") i = 0 for c in lcom: liste_nom = c.split() nom = liste_nom[-1] pos = papers.find(nom) if pos != -1: p = papers[pos-30:pos+70] p = p.translate(sepTable) logging.debug(c+' / '+ p +'\n') i += 1 logging.debug('# comite member with paper='+str(i)) def test2(): nb =1.0/(11) i=0 while i < 10000 and nb > 0: nb1 = 60.0*nb ar = int(nb1) nb = nb1-ar assert 0 <= ar <60 logging.debug('nb1=%f / ar=%f / nb=%f',nb1,ar,nb) i+=1 logging.debug('i='+str(i)) def test3(): import numarray a = numarray.zeros(5,numarray.Int8) for i in range(len(a)): a[i] = i b = numarray.ones(5,numarray.Int8) c = numarray.zeros(5,numarray.Int8) print a print b print a-b print b-a print numarray.minimum(a,b) taille = len(a) numarray.logical_and(b-a,c, a) print a #for pos in xrange(taille): a = numarray.array([1,2,3,4]) print numarray.sum(a) def test4(): import numarray pos = numarray.array([1,5,9,1]) x = numarray.array([0,0,0,0,0,0,0,0,0,0]) numarray.put(x, pos, [1,1,1,1]) print x def d(test,i): return where(test==i,1,0) def d2(test): d(test) #a=numarray.fromfunction(d2,(10,)) #pos2 = numarray.resize(pos,(4,10)) b = numarray.zeros(10) a = numarray.zeros((4,10)) print a for i in xrange(4-1,-1,-1): pos2 = pos[i] c = numarray.where(numarray.arange(10)==pos2,1,0) numarray.add(b,c,b) a[i] = b print a def test5(): ma = Numeric.array([[1,2,3],[4,5,6]]) for x in ma: print x def _readFile(name): """ Lit un fichier dans le dossier courant et renvoie une chaine """ path=os.path.join(os.getcwd(),"MediteAppli","test", "textes",name) f = open(path) res = f.read() f.close() return res def test6(): a1 = [-1,1,2,3,10,5] b1 = [-1,10,3,2,1,6] # ; b1.extend([-1]*2) #b1.extend([-1] * 3) a = numarray.array(a1, type = 'Float32') b = numarray.array(b1)#,6])#, type = 'Float32') #c = numarray.where(((a / b) > 0.5) or ((b / a) < 0.4),a,0) c = numarray.where((a / b > 0.5) , a, 0) c2 = numarray.where((a / b < 2) , b, 0) c3 = numarray.logical_and(c,c2) #c>0 c2 >0,c,0) print c,c2, c3 #c4 = numarray.ma.array(c,mask=c3) c4 = numarray.where(c3,a,0) c5 = numarray.where(c3,b,0) c6 = numarray.logical_and(c4,c5) print c4, c5, c6 #d = numarray.zeros(5)#, type = 'Float32') #for i in xrange(len(a)): ## print i,a[i] / b[i], b[i] / a[i] # if a[i] / b[i] > 0.5: d[i] = a[i] #print d import alignement #from test.test_data import TestDataFactory #f = TestDataFactory() #td = f.getTestData(test_data.ClaudeBernard,"2") align = alignement.AlignAstarRecur2(_readFile('CB_experience.txt')+_readFile("CB_Texte_de_1857.txt")) c7,c8 = align.extractRemplacements(a1,b1) #print numarray.nonzero(c7),numarray.nonzero(c7)[0][1] c9 = numarray.array([0,0,0]) #print numarray.nonzero(c9),numarray.nonzero(c9)[0][1] print c7,c8 print numarray.argmax(a1) print numarray.array() print len([]) def test7(): a = ['aa','bb'] b = ['aa','bb'] print a == b def test8(): a = [ 100,80,75,74,70,60,50,30,22,20,15,10,2] b = [2,5,10,12,13,19,20,30,50] import bisect print bisect.bisect_left(a,11) print bisect.bisect_right(b,2) def test9(): import math print math.log(0L) def fact(x): if x == 0: return 1 else: return x * fact(x-1) def fact2(n): return reduce(lambda x,y:x*y,range(1,n+1)) F ={} def fact3(n): n1 = n try: return F[n] except KeyError: pass f = 1 while (n > 0): f = f * n n = n - 1 F[n1] = f return f def test10(): import math #f = lambda(n: n* f(n-1)) n = 7 ; m = 7 t1 = math.pow(2,n) * math.pow(2,m) t2 = 0 for i in xrange(1,m+1): #print 'i',i for j in xrange(1,i+1): #print 'j',j new_t2 = (fact3(i)/fact3(i-j)) * math.pow(3,j) t2 += new_t2 print 'i',i,new_t2 print t1,t2,t1+t2 def test11(): import math n = 7 ; m = 7 f_n = fact3(n) ; f_m = fact3(m) t = 0 for i in xrange(1,m): t2 = 0 for j in xrange(1,n): c2 = f_n / (fact3(j) * fact3(n-j)) #c2 = fact3(i) / (fact3(j) * fact3(i-j)) new_t2 = 1#(fact3(i)/fact3(i-j)) #* math.pow(3,j) t2 += c2 * new_t2 print 'i:',i,' / c2:',c2,' / new_t2:',new_t2,' / c2*new_t2:',c2*new_t2,' / t2:',t2 c1 = f_m / (fact3(i) * fact3(m-i)) t += c1 * t2 print 'i:',i,' / c1:',c1,' / t2:',t2,' / c1*t2:',c1*t2 print 't:',t def comb(n,k): assert n >= k if k < 0: k = 0 return fact3(n) / (fact3(n-k) * fact3(k)) def arang(n,k): assert n >= k return fact3(n) / fact3(n-k) def test12(): import math n = 7 ; m = 7 f_n = fact3(n-1) ; f_m = fact3(m-1) t = 0 for i in xrange(1,m): t2 = 0 for j in xrange(1,n): c2 = comb(m-1,j-1) #f_n / (fact3(j-1) * fact3(n-1-(j-1))) #c2 = fact3(i) / (fact3(j) * fact3(i-j)) #if i >= j: new_t2 = (fact3(i)/fact3(i-j)) #* math.pow(3,j) #else: new_t2 = (fact3(j)/fact3(j-i)) c1 = comb(n-1,i-1) #f_m / (fact3(i-1) * fact3(m-1-(i-1))) t2 += c2 * new_t2 *c1 print 'i:',i,' / c2:',c2,' / new_t2:',new_t2,' / c2*new_t2:',c2*new_t2,' / t2:',t2 t += t2 print 'i:',i,' / c1:',c1,' / t2:',t2,' / c1*t2:',c1*t2 print 't:',t def test13(): import math n = 3 ; m = 3 f_n = fact3(n-1) ; f_m = fact3(m-1) t = 0 for i in xrange(1,m): #t2 = 0 for j in xrange(1,n): c2 = comb(m-1,j-1) #f_n / (fact3(j-1) * fact3(n-1-(j-1))) #c2 = fact3(i) / (fact3(j) * fact3(i-j)) #if i >= j: #new_t2 = (fact3(i)/fact3(i-j)) #* math.pow(3,j) #else: new_t2 = (fact3(j)/fact3(j-i)) c1 = comb(n-1,i-1) #f_m / (fact3(i-1) * fact3(m-1-(i-1))) new_t2 = 0 ; x = min(i,j) for k in xrange(x+1): new_t2 += math.pow(comb(x,k),2) #* math.pow(3,k) t += c1 * c2 * new_t2 print 'i:',i,' / c2:',c2,' / new_t2:',new_t2#,' / c2*new_t2:',c2*new_t2,' / t2:',t2 #t += t2 print 'i:',i,' / c1:',c1#,' / t2:',t2,' / c1*t2:',c1*t2 print 't:',t l = [2, [2, 5, [3, 3, 1], [0, 0], 0], [5, 5, [28, 28,1], [0, 0], 1]] def aplatir(s,x): #print 's=',s,' / x=',x try: reduce(aplatir,x,s) except: #print 'exception' s.append(x) #print s return s import math #print aplatir([], l) import cPickle fichier_cp = os.path.join(os.getcwd(),'cp.pkl') if False and os.path.isfile(fichier_cp): pkl_file = open(fichier_cp, 'rb') CP = cPickle.load(pkl_file) pkl_file.close() else: CP = {} def choose_pos(nb_choix,taille_seq): assert nb_choix <= taille_seq try: return CP[(nb_choix,taille_seq)] except KeyError: pass liste_nuplet = [] for i in xrange(taille_seq,0,-1): if nb_choix == 1: liste_nuplet.append([i]) else: l2 = choose_pos(nb_choix-1,taille_seq-1) #print l2 #if len(l2) == 0: continue l3 = map(lambda elt: aplatir([],[i,elt]), l2) #print l3 #j = 1 ; maxi = sum(l3[0]) ; l4 = [l3[0]] #while j < len(l3): # if sum(l3[j]) < maxi and len(set(l3[j]))==len(l3[j]): # l4.append(l3[j]) # maxi = sum(l3[j]) # j += 1 for x in l3: assert len(x) == nb_choix, (x,nb_choix) liste_nuplet.extend(l3) l4 = filtre2(liste_nuplet, taille_seq, nb_choix) if not CP.has_key((nb_choix,taille_seq)): logging.debug( 'BU CP[' +str((nb_choix,taille_seq))+ ']') CP[(nb_choix,taille_seq)] = l4 output = open(fichier_cp, 'wb') cPickle.dump(CP, output) output.close() return l4 def filtre2(liste_nuplet, taille_seq, nb_choix): l4=[] for x in liste_nuplet: previous = taille_seq+1 ; OK = True ; totalEcart = maxEcart = 0 for pos in x: #if x == [6, 5, 3, 1]: print previous,pos,abs(previous-pos)> 3,previous <= pos ecart = abs(previous-pos) totalEcart += ecart-1 ; maxEcart = max(maxEcart,ecart-1) if (ecart > 2) or (previous <= pos): OK = False #break previous = pos #if x == [6, 5, 3, 1]: print previous,0,abs(previous-0)> 3,previous <= 0 ecart = abs(previous-0) totalEcart += ecart-1 ; maxEcart = max(maxEcart,ecart-1) if (ecart > 2) or (previous <= 0): OK = False if OK: l4.append(x) assert totalEcart <= 2*nb_choix+1,(totalEcart, 2*nb_choix+1, taille_seq, nb_choix,x) #else: #assert totalEcart > 2*nb_choix+1 or maxEcart > 1,(totalEcart, 2*nb_choix+1, maxEcart, taille_seq, nb_choix,x) #sys.stdout.flush() return l4 def filtre1(liste_nuplet, taille_seq, nb_choix): l4 = [] for x in liste_nuplet: #print x i = 0 ; totalEcarts = 0 sorted = True ; following = True ; ecartOK = True ; ecartExtremes = True for i in xrange(0,len(x)-1): if x[i] <= x[i+1]: sorted = False break ecart = abs(x[i]-x[i+1]) totalEcarts += ecart if ecart >= 3: following = False break if sorted and following: totalEcarts += abs(taille_seq-x[0]) + abs(1-x[-1]) if totalEcarts > nb_choix+1: ecartOK = False #print x,taille_seq,x[0],abs(taille_seq-x[0]),'/',1,x[-1],abs(1-x[-1]) if (abs(taille_seq-x[0])>=2) or (abs(1-x[-1])>=2): ecartExtremes = False # print 'ecartExtremes = False' #else: print 'ecartExtremes = True' if ecartOK and ecartExtremes: l4.append(x) #else: print 'not S F' #print 'l4',l4 return l4 def test14(m = 2, n = 3): t = 0 for k in xrange(1,m): t += comb(n,k) * math.pow(2,k) print 't140:',math.pow(2,n-1)*math.pow(2,m-1)*t print 't141:',math.pow(2,n-1)*math.pow(2,m-1)*comb(n-1,m-1) * math.pow(2,min(n-1,m-1)) print 't142:',math.pow(2,n-1)*math.pow(2,m-1)*comb(n-1,m-1) * math.pow(2,min(n-1,m-1)) print 't143:',math.pow(2,n)*math.pow(2,m)*comb(n,m) * math.pow(2,min(n,m)) t2 = t3 = 0 for i in xrange(1,m+1): #print 'i:',i #for j in xrange(1,min(n+1,2*i+1)): for j in xrange(1,n+1): #if j > 2*i+1: #print 'i=',i,'/ j=',j # continue c1 = comb(m-1,i-1) c2 = comb(n-1,j-1) new_t2 = 0 x = min(i,j) ; y = max(i,j) new_t2 = 0 ; new_t3 = 0 for k in xrange(1,x+1): new_t2 += comb(y,k) * arang(x,k) * math.pow(2,k) new_t3 += arang(y,k) * comb(x,k) * math.pow(3,k) #print x,y,k,new_t2 new_tt2 = c1 * c2 * new_t2 new_tt3 = c1 * c2 * new_t3 t2 += new_tt2 t3 += new_tt3 #print ' j:',j,' / c1:',c1,' / c2:',c2,' / new_t2:',new_t2,' / new_t',new_t #print 'i:',i,' / c1:',c1#,' / t2:',t2,' / c1*t2:',c1*t2 logging.debug('t14 t2:'+str(t2)) logging.debug('t14 t3:'+str(t3)) print 't14 t2:',t2 print 't14 t3:',t3 sys.stdout.flush() def test15(m = 2, n = 3): t2 = t3 = t4 = 0 for i in xrange(1,m+1): z = min(n+1,2*i+2) ; z2 = z-1 #z = n+1 ; z2 = z-1 for j in xrange(1,z): assert j <= 2*i+1 #if j > 2*i+1: #print 'i=',i,'/ j=',j # continue c1 = comb(m-1,i-1) c2 = comb(z2-1,j-1) new_t2 = 0 x = min(i,j) ; y = max(i,j) new_t2 = 0 ; new_t3 = 0 ; new_t4 = 0 b_k = int(math.floor(y / 2.0)) for k in xrange(b_k,x+1): new_t2 += comb(y,k) * arang(x,k) * math.pow(2,k) new_t3 += arang(y,k) * comb(x,k) * math.pow(2,k) #new_t2 = len(cp) #* math.pow(2,x) #new_t2 = comb(y,x) new_tt2 = c1 * c2 * new_t2 new_tt3 = c1 * c2 * new_t3 new_tt4 = c1 * c2 * comb(y,x) * arang(x,x) * math.pow(2,k) t2 += new_tt2 t3 += new_tt3 t4 += new_tt4 #print ' j:',j,' / c1:',c1,' / c2:',c2,' / new_t2:',new_t2,' / new_t',new_t #print 'i:',i,' / c1:',c1#,' / t2:',t2,' / c1*t2:',c1*t2 logging.debug('t15 t2:'+str(t2)) logging.debug('t15 t3:'+str(t3)) print 't15 t2:',t2 print 't15 t3:',t3 print 't15 t4:',t4 import bisect def test16(): #A = "ce.matin.le.chat.observa.de.petits.oiseaux.dans.les.arbres." #B = "le.chat.etait.en.train.d.observer.des.oiseaux.dans.les.petits.arbres.ce.matin.il.observa.les.oiseaux.pendant.deux.heures." A = "Ce matin le chat observa de petits oiseaux dans les arbres." B = "Le chat était en train d'observer des oiseaux dans les petits arbres ce matin. Il observa les oiseaux pendant deux heures." #print A #print B dicoRepet = {} #; dicoRepet2 = {} dicoRepet = _extractRepet(A, B, dicoRepet) dicoRepet = _extractRepet(B, A, dicoRepet) dicoRepet = _filterDico(dicoRepet, A, B)#, dicoRepet2) #print dicoRepet i = 1 ; cumul = 0 while i <= len(dicoRepet): cumul += _afficheDic(dicoRepet,i) print '---------------------------------------' i += 1 print 'total = ',cumul def _filterDico(dicoRepet, A, B):#1, dicoRepet2): nouveauDicoRet = {} for longueur, dico_chaine in dicoRepet.iteritems(): nouveauDicoRet[longueur] = {} for chaine,(liste_pos1,liste_pos2) in dico_chaine.iteritems(): l1 = [] ; l2 = [] for i in liste_pos1: if chaine == A[i:i+longueur]: l1.append(i) for j in liste_pos2: if chaine == B[j:j+longueur]: l2.append(j) assert len(l1) > 0 and len(l2) > 0 nouveauDicoRet[longueur][chaine] = l1, l2 return nouveauDicoRet def _afficheDic(dicoRepet, longueur): cumul = 0 liste_alpha = [] for cle,item in dicoRepet[longueur].iteritems(): #print cle ; sys.stdout.flush() bisect.insort_right(liste_alpha, cle) a = liste_alpha liste_alpha.sort() assert a == liste_alpha for chaine in liste_alpha: liste_pos1, liste_pos2 = dicoRepet[longueur][chaine] print longueur,' / ',chaine,' / ',liste_pos1,len(liste_pos1),' / ',liste_pos2,len(liste_pos2) cumul += len(liste_pos1) + len(liste_pos2) print longueur,' / ',cumul return cumul def _extractRepet(sequence1, sequence2, dicoRepet): for longueur in xrange(1,len(sequence1)): i = 0 while i < len(sequence1) - longueur + 1: chaine1 = sequence1[i:i+longueur] #print chaine1 if longueur <= len(sequence2): j = 0 while j < len(sequence2) - longueur +1: chaine2 = sequence2[j:j+longueur] if chaine1 == chaine2: if not dicoRepet.has_key(longueur): dicoRepet[longueur] = {} if not dicoRepet[longueur].has_key(chaine1): dicoRepet[longueur][chaine1] = [],[] liste_pos1, liste_pos2 = dicoRepet[longueur][chaine1] if i not in liste_pos1: bisect.insort_right(liste_pos1, i) if j not in liste_pos2: bisect.insort_right(liste_pos2, j) dicoRepet[longueur][chaine1] = liste_pos1, liste_pos2 j += 1 i += 1 return dicoRepet if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG,#INFO, format='%(asctime)s %(levelname)s %(message)s', #datefmt='%H:%M:%S', filename=os.path.join(os.getcwd(),'log_temp.txt'), filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) #test4() #print ord('a'),ord('A'),ord('z'),ord('Z') #print [4]*3 #test10() #test11() #test12() #test13() m = 10; n = 100 #test14(m,n) #test15(m,n) #print (math.floor(5 / 2.0)) test16()