medite.MediteAppli.temp

1 # -*- coding: iso-8859-1 -*- 2 # Copyright 20003 - 2008: Julien Bourdaillet (julien.bourdaillet@lip6.fr), Jean-Gabriel Ganascia (jean-gabriel.ganascia@lip6.fr) 3 # This file is part of MEDITE. 4 # 5 # MEDITE is free software; you can redistribute it and/or modify 6 # it under the terms of the GNU General Public License as published by 7 # the Free Software Foundation; either version 2 of the License, or 8 # (at your option) any later version. 9 # 10 # MEDITE is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with Foobar; if not, write to the Free Software 17 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 19 import logging, os, os.path, string 20 import numpy.oldnumeric as Numeric 21 import numpy.numarray as numarray 22 import numpy.numarray.ma as ma 23 24

25 -def _readFile(name):

26 """ Lit un fichier dans le dossier courant et renvoie une chaine """ 27 path=os.path.join(os.getcwd(),"MediteAppli","test",name) 28 f = open(path) 29 res = f.read() 30 f.close() 31 return res

32 33

34 -def test():

35 comite = _readFile('comite.txt') 36 papers = _readFile('papers.txt') 37 lcom = comite.splitlines() 38 logging.debug('# comite='+str(len(lcom))) 39 sepTable = string.maketrans("""\r\n\t"""," ") 40 i = 0 41 for c in lcom: 42 liste_nom = c.split() 43 nom = liste_nom[-1] 44 pos = papers.find(nom) 45 if pos != -1: 46 p = papers[pos-30:pos+70] 47 p = p.translate(sepTable) 48 logging.debug(c+' / '+ p +'\n') 49 i += 1 50 logging.debug('# comite member with paper='+str(i))

51

52 -def test2():

53 nb =1.0/(11) 54 i=0 55 while i < 10000 and nb > 0: 56 nb1 = 60.0*nb 57 ar = int(nb1) 58 nb = nb1-ar 59 assert 0 <= ar <60 60 logging.debug('nb1=%f / ar=%f / nb=%f',nb1,ar,nb) 61 i+=1 62 logging.debug('i='+str(i))

63

64 -def test3():

65 import numarray 66 a = numarray.zeros(5,numarray.Int8) 67 for i in range(len(a)): 68 a[i] = i 69 b = numarray.ones(5,numarray.Int8) 70 c = numarray.zeros(5,numarray.Int8) 71 print a 72 print b 73 print a-b 74 print b-a 75 print numarray.minimum(a,b) 76 taille = len(a) 77 numarray.logical_and(b-a,c, a) 78 print a 79 #for pos in xrange(taille): 80 81 a = numarray.array([1,2,3,4]) 82 print numarray.sum(a)

83

84 -def test4():

85 import numarray 86 pos = numarray.array([1,5,9,1]) 87 x = numarray.array([0,0,0,0,0,0,0,0,0,0]) 88 numarray.put(x, pos, [1,1,1,1]) 89 print x 90 def d(test,i): 91 return where(test==i,1,0) 92 def d2(test): 93 d(test) 94 #a=numarray.fromfunction(d2,(10,)) 95 #pos2 = numarray.resize(pos,(4,10)) 96 b = numarray.zeros(10) 97 a = numarray.zeros((4,10)) 98 print a 99 for i in xrange(4-1,-1,-1): 100 pos2 = pos[i] 101 c = numarray.where(numarray.arange(10)==pos2,1,0) 102 numarray.add(b,c,b) 103 a[i] = b 104 print a 105

106 -def test5():

107 ma = Numeric.array([[1,2,3],[4,5,6]]) 108 for x in ma: print x

109

110 -def _readFile(name):

111 """ Lit un fichier dans le dossier courant et renvoie une chaine """ 112 path=os.path.join(os.getcwd(),"MediteAppli","test", "textes",name) 113 f = open(path) 114 res = f.read() 115 f.close() 116 return res

117

118 -def test6():

119 a1 = [-1,1,2,3,10,5] 120 b1 = [-1,10,3,2,1,6] # ; b1.extend([-1]*2) 121 #b1.extend([-1] * 3) 122 a = numarray.array(a1, type = 'Float32') 123 b = numarray.array(b1)#,6])#, type = 'Float32') 124 125 #c = numarray.where(((a / b) > 0.5) or ((b / a) < 0.4),a,0) 126 c = numarray.where((a / b > 0.5) , a, 0) 127 c2 = numarray.where((a / b < 2) , b, 0) 128 c3 = numarray.logical_and(c,c2) #c>0 c2 >0,c,0) 129 print c,c2, c3 130 #c4 = numarray.ma.array(c,mask=c3) 131 c4 = numarray.where(c3,a,0) 132 c5 = numarray.where(c3,b,0) 133 c6 = numarray.logical_and(c4,c5) 134 print c4, c5, c6 135 #d = numarray.zeros(5)#, type = 'Float32') 136 #for i in xrange(len(a)): 137 ## print i,a[i] / b[i], b[i] / a[i] 138 # if a[i] / b[i] > 0.5: d[i] = a[i] 139 #print d 140 import alignement 141 #from test.test_data import TestDataFactory 142 #f = TestDataFactory() 143 #td = f.getTestData(test_data.ClaudeBernard,"2") 144 align = alignement.AlignAstarRecur2(_readFile('CB_experience.txt')+_readFile("CB_Texte_de_1857.txt")) 145 c7,c8 = align.extractRemplacements(a1,b1) 146 #print numarray.nonzero(c7),numarray.nonzero(c7)[0][1] 147 c9 = numarray.array([0,0,0]) 148 #print numarray.nonzero(c9),numarray.nonzero(c9)[0][1] 149 print c7,c8 150 print numarray.argmax(a1) 151 print numarray.array() 152 print len([])

153

154 -def test7():

155 a = ['aa','bb'] 156 b = ['aa','bb'] 157 print a == b

158

159 -def test8():

160 a = [ 100,80,75,74,70,60,50,30,22,20,15,10,2] 161 b = [2,5,10,12,13,19,20,30,50] 162 import bisect 163 print bisect.bisect_left(a,11) 164 print bisect.bisect_right(b,2)

165

166 -def __splitNGrammes( texte, tailleNgram, lenT1):

167 if True: 168 sep=" " 169 else: sep="." 170 171 if tailleNgram == 1: 172 return __splitNGrammes2( texte, tailleNgram, lenT1) 173 else: 174 r1 = __splitNGrammes2( texte, tailleNgram, lenT1) 175 t = texte.split(sep) ; i = 0 176 print t 177 while len(t[0]) == 0: 178 t = t[1:] ; i += 1 179 assert len(t[0]) > 0 180 i += len(t[0]) ; t = t[1:] 181 print t 182 while len(t[0]) == 0: 183 t = t[1:] ; i += 1 184 else: i += 1 185 print t,i 186 r2 = __splitNGrammes2( texte[i:], tailleNgram, lenT1+i) 187 r1.extend(r2) 188 if tailleNgram == 2: 189 return r1 190 else: 191 assert len(t[0]) > 0 192 i += len(t[0]) ; t = t[1:] 193 print t 194 while len(t[0]) == 0: 195 t = t[1:] ; i += 1 196 else: i += 1 197 print t,i 198 r3 = __splitNGrammes2( texte[i:], tailleNgram, lenT1+i) 199 r1.extend(r3) 200 return r1

201

202 -def __splitNGrammes2( texte, tailleNgram, lenT1):

203 """Sort les blocs d'un texte en nGrammes 204 @param texte: le texte 205 @param tailleNgram: le nombre de mots que l'on desire dans un bloc (1=> monogramme, 2=> bigramme etc...) 206 @param lenT1: l'offset pour la position (0 pour le texte1, len(texte1) pour le texte 2 207 """ 208 if True: 209 sep=" " 210 else: sep="." 211 res = [] # liste résultat 212 i = 0 # indice courant dans le texte 213 prev = 0 # indice de début du ngram 214 curNgram = 0 # taille du ngram courant (en nb de gram) 215 accu = [] # accumulateur du ngram courant 216 while i < len(texte): 217 car = texte[i] # caractère courant 218 accu.append(car) 219 if car == sep: # si séparateur, on vient d'ajouter un au ngram courant 220 curNgram += 1 221 # si on a la taille de ngram souhaitée, on l'ajoute 222 if curNgram == tailleNgram: 223 ngram = ''.join(accu) 224 debut = prev +lenT1 225 fin = debut + len(ngram) 226 #print ngram,[debut,fin] 227 assert ngram == texte[debut-lenT1:fin-lenT1], (ngram, texte[debut-lenT1:fin-lenT1]) 228 res.append((ngram,[debut,fin])) 229 accu = [] ; curNgram = 0 ; prev = i + 1 230 i += 1 231 print ''.join(accu) 232 return res

233

234 -def test9():

235 a = "ar er rt" 236 l = a.split(' ') 237 print l 238 assert len(a) == sum([len(x) for x in l])+len(l)-1

239

240 -def test10():

241 import random 242 random.seed(4654) 243 random.gauss(100, 5) 244 liste = [] 245 for x in xrange(30): 246 liste.append(int(random.gauss(15, 10))) 247 print liste

248 249 if __name__ == '__main__': 250 logging.basicConfig(level=logging.DEBUG,#INFO, 251 format='%(asctime)s %(levelname)s %(message)s', 252 #datefmt='%H:%M:%S', 253 filename=os.path.join(os.getcwd(),'log.txt'), 254 filemode='w') 255 console = logging.StreamHandler() 256 console.setLevel(logging.INFO) 257 #test4() 258 #print ord('a'),ord('A'),ord('z'),ord('Z') 259 #print [4]*3 260 #test9() 261 #r = __splitNGrammes("ab cd ef gh ij kl mn", 1, 0) 262 #print r 263 test10() 264

Source Code for Module medite.MediteAppli.temp