# -*- coding: iso-8859-1 -*- # Copyright 20003 - 2008: Julien Bourdaillet (julien.bourdaillet@lip6.fr), Jean-Gabriel Ganascia (jean-gabriel.ganascia@lip6.fr) # This file is part of MEDITE. # # MEDITE is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # MEDITE is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Foobar; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import logging, os, os.path, string def _readFile(name): """ Lit un fichier dans le dossier courant et renvoie une chaine """ path=os.path.join(os.getcwd(),"MediteAppli","test",name) f = open(path) res = f.read() f.close() return res def test(): comite = _readFile('comite.txt') papers = _readFile('papers.txt') lcom = comite.splitlines() logging.debug('# comite='+str(len(lcom))) sepTable = string.maketrans("""\r\n\t"""," ") i = 0 for c in lcom: liste_nom = c.split() nom = liste_nom[-1] pos = papers.find(nom) if pos != -1: p = papers[pos-30:pos+70] p = p.translate(sepTable) logging.debug(c+' / '+ p +'\n') i += 1 logging.debug('# comite member with paper='+str(i)) class MatchedPairs(object): """Classe contenant les paires de duplicats""" def __init__(self): # liste des restos dupliqués # Format: [(Zagat, Fodor), (Z,F), ...] # [((Zresto complet, Zresto complet No Phone, Z nom, Z adresse), # (Fresto complet, F nom, F adresse)), ...] self.liste = [(("Arnie Morton's of Chicago 435 S. La Cienega Blvd. Los Angeles 310-246-1501 Steakhouses", "Arnie Morton's of Chicago 435 S. La Cienega Blvd. Los Angeles Steakhouses", "Arnie Morton's of Chicago", "435 S. La Cienega Blvd"), ("Arnie Morton's of Chicago 435 S. La Cienega Blvd. Los Angeles 310/246-1501 American", "Arnie Morton's of Chicago", "435 S. La Cienega Blvd")), ######################## (("Art's Deli 12224 Ventura Blvd. Studio City 818-762-1221 Delis", "Art's Deli 12224 Ventura Blvd. Studio City Delis", "Art's Deli", "12224 Ventura Blvd."), ("Art's Delicatessen 12224 Ventura Blvd. Studio City 818/762-1221 American", "Art's Delicatessen", "12224 Ventura Blvd. Studio City")), ######################## (("Bel-Air Hotel 701 Stone Canyon Rd. Bel Air 310-472-1211 Californian", "Bel-Air Hotel 701 Stone Canyon Rd. Bel Air Californian", "Bel-Air Hotel", "701 Stone Canyon Rd. Bel Air"), ("Hotel Bel-Air 701 Stone Canyon Rd. Bel Air 310/472-1211 Californian", "Hotel Bel-Air", "701 Stone Canyon Rd")), ######################## (("Cafe Bizou 14016 Ventura Blvd. Sherman Oaks 818-788-3536 French Bistro", "Cafe Bizou 14016 Ventura Blvd. Sherman Oaks French Bistro", "Cafe Bizou", "14016 Ventura Blvd."), ("Cafe Bizou 14016 Ventura Blvd. Sherman Oaks 818/788-3536 French", "Cafe Bizou", "14016 Ventura Blvd. Sherman Oaks")), ######################## (("Campanile 624 S. La Brea Ave. Los Angeles 213-938-1447 Californian", "Campanile 624 S. La Brea Ave. Los Angeles Californian", "Campanile", "624 S. La Brea Ave."), ("Campanile 624 S. La Brea Ave. Los Angeles 213/938-1447 American", "Campanile", "624 S. La Brea Ave.")), ######################## (("Chinois on Main 2709 Main St. Santa Monica 310-392-9025 Pacific New Wave", "Chinois on Main 2709 Main St. Santa Monica Pacific New Wave", "Chinois on Main", "2709 Main St."), ("Chinois on Main 2709 Main St. Santa Monica 310/392-9025 French", "Chinois on Main", "2709 Main St. Santa Monica")), ######################## (("Citrus 6703 Melrose Ave. Los Angeles 213-857-0034 Californian", "Citrus 6703 Melrose Ave. Los Angeles Californian", "Citrus", "6703 Melrose Ave."), ("Citrus 6703 Melrose Ave. Los Angeles 213/857-0034 Californian", "Citrus", "6703 Melrose Ave.")), ######################## (("Fenix at the Argyle 8358 Sunset Blvd. W. Hollywood 213-848-6677 French (New)", "Fenix at the Argyle 8358 Sunset Blvd. W. Hollywood French (New)", "Fenix at the Argyle", "8358 Sunset Blvd."), ("Fenix 8358 Sunset Blvd. West Hollywood 213/848-6677 American", "Fenix", "8358 Sunset Blvd. West Hollywood")), ######################## (("23725 W. Malibu Rd. Malibu 310-456-0488 Californian", "23725 W. Malibu Rd. Malibu Californian", "Granita", "23725 W. Malibu Rd."), ("23725 W. Malibu Rd. Malibu 310/456-0488 Californian", "Granita", "23725 W. Malibu Rd. Malibu")), ######################## (("Grill The 9560 Dayton Way Beverly Hills 310-276-0615 American (Traditional)", "Grill The 9560 Dayton Way Beverly Hills American (Traditional)", "Grill", "The 9560 Dayton Way"), ("Grill on the Alley 9560 Dayton Way Los Angeles 310/276-0615 American", "Grill on the Alley", "9560 Dayton Way Los Angeles")), ######################## (("Katsu 1972 Hillhurst Ave. Los Feliz 213-665-1891 Japanese", "Katsu 1972 Hillhurst Ave. Los Feliz Japanese", "Katsu", "1972 Hillhurst Ave."), ("Restaurant Katsu 1972 N. Hillhurst Ave. Los Angeles 213/665-1891 Asian", "Restaurant Katsu", "1972 N. Hillhurst Ave.")), ######################## (("L'Orangerie 903 N. La Cienega Blvd. W. Hollywood 310-652-9770 French (Classic)", "L'Orangerie 903 N. La Cienega Blvd. W. Hollywood French (Classic)", "L'Orangerie", "903 N. La Cienega Blvd."), ("L'Orangerie 903 N. La Cienega Blvd. Los Angeles 310/652-9770 French", "L'Orangerie", "903 N. La Cienega Blvd. W. Hollywood")), ######################## (("Le Chardonnay (Los Angeles) 8284 Melrose Ave. Los Angeles 213-655-8880 French Bistro", "Le Chardonnay (Los Angeles) 8284 Melrose Ave. Los Angeles French Bistro", "Le Chardonnay", "8284 Melrose Ave."), ("Le Chardonnay 8284 Melrose Ave. Los Angeles 213/655-8880 French", "Le Chardonnay", "8284 Melrose Ave.")), ######################## (("Locanda Veneta 8638 W. Third St. Los Angeles 310-274-1893 Italian", "Locanda Veneta 8638 W. Third St. Los Angeles Italian", "Locanda Veneta", "8638 W. Third St."), ("Locanda Veneta 3rd St. Los Angeles 310/274-1893 Italian", "Locanda Veneta", "3rd St.")), ######################## (("Matsuhisa 129 N. La Cienega Blvd. Beverly Hills 310-659-9639 Seafood", "Matsuhisa 129 N. La Cienega Blvd. Beverly Hills Seafood", "Matsuhisa", "129 N. La Cienega Blvd."), ("Matsuhisa 129 N. La Cienega Blvd. Beverly Hills 310/659-9639 Asian", "Matsuhisa", "129 N. La Cienega Blvd. Beverly Hills")), ######################## (("Palm The (Los Angeles) 9001 Santa Monica Blvd. W. Hollywood 310-550-8811 Steakhouses", "Palm The (Los Angeles) 9001 Santa Monica Blvd. W. Hollywood Steakhouses", "Palm The ", "9001 Santa Monica Blvd."), ("The Palm 9001 Santa Monica Blvd. Los Angeles 310/550-8811 American", "The Palm", "9001 Santa Monica Blvd.")), ######################## (("Patina 5955 Melrose Ave. Los Angeles 213-467-1108 Californian", "Patina 5955 Melrose Ave. Los Angeles Californian", "Patina", "5955 Melrose Ave."), ("Patina 5955 Melrose Ave. Los Angeles 213/467-1108 Californian", "Patina", "5955 Melrose Ave.")), ######################## (("Philippe The Original 1001 N. Alameda St. Chinatown 213-628-3781 Cafeterias", "Philippe The Original 1001 N. Alameda St. Chinatown Cafeterias", "Philippe The Original", "1001 N. Alameda St."), ("Philippe's The Original 1001 N. Alameda St. Los Angeles 213/628-3781 American", "Philippe's The Original", "1001 N. Alameda St.")), ######################## (("Pinot Bistro 12969 Ventura Blvd. Studio City 818-990-0500 French Bistro", "Pinot Bistro 12969 Ventura Blvd. Studio City French Bistro", "Pinot Bistro", "12969 Ventura Blvd."), ("Pinot Bistro 12969 Ventura Blvd. Los Angeles 818/990-0500 French", "Pinot Bistro", "12969 Ventura Blvd.")), ######################## (("Rex Il Ristorante 617 S. Olive St. Los Angeles 213-627-2300 Nuova Cucina Italian", "Rex Il Ristorante 617 S. Olive St. Los Angeles Nuova Cucina Italian", "Rex Il Ristorante", "617 S. Olive St."), ("Rex Il Ristorante 617 S. Olive St. Los Angeles 213/627-2300 Italian", "Rex Il Ristorante", "617 S. Olive St.")), ######################## (("Spago (Los Angeles) 8795 Sunset Blvd. W. Hollywood 310-652-4025 Californian", "Spago (Los Angeles) 8795 Sunset Blvd. W. Hollywood Californian", "Spago", "8795 Sunset Blvd."), ("Spago 1114 Horn Ave. Los Angeles 310/652-4025 Californian", "Spago", "1114 Horn Ave.")), ######################## (("Valentino 3115 Pico Blvd. Santa Monica 310-829-4313 Italian", "Valentino 3115 Pico Blvd. Santa Monica Italian", "Valentino", "3115 Pico Blvd."), ("Valentino 3115 Pico Blvd. Santa Monica 310/829-4313 Italian", "Valentino", "3115 Pico Blvd. Santa Monica")), ######################## (("Yujean Kang's 67 N. Raymond Ave. Pasadena 818-585-0855 Chinese", "Yujean Kang's 67 N. Raymond Ave. Pasadena Chinese", "Yujean Kang's", "67 N. Raymond Ave. Pasadena"), ("Yujean Kang's Gourmet Chinese Cuisine 67 N. Raymond Ave. Los Angeles 818/585-0855 Asian", "Yujean Kang's Gourmet Chinese Cuisine", "67 N. Raymond Ave.")), ######################## (("21 Club 21 W. 52nd St. New York City 212-582-7200 American (New)", "21 Club 21 W. 52nd St. New York City American (New)", "21 Club", "21 W. 52nd St."), ("21 Club 21 W. 52nd St. New York 212/582-7200 American", "21 Club", "21 W. 52nd St.")), ######################## (("Aquavit 13 W. 54th St. New York City 212-307-7311 Scandinavian", "Aquavit 13 W. 54th St. New York City Scandinavian", "Aquavit", "13 W. 54th St."), ("Aquavit 13 W. 54th St. New York 212/307-7311 Continental", "Aquavit", "13 W. 54th St. New York")), ######################## (("Aureole 34 E. 61st St. New York City 212-319-1660 American (New)", "Aureole 34 E. 61st St. New York City American (New)", "Aureole", "34 E. 61st St."), ("Aureole 34 E. 61st St. New York 212/ 319-1660 American", "Aureole", "34 E. 61st St. New York")), ######################## (("BCafe Lalo 201 W. 83rd St. New York City 212-496-6031 Coffeehouses", "BCafe Lalo 201 W. 83rd St. New York City Coffeehouses", "BCafe Lalo", "201 W. 83rd St."), ("Café Lalo 201 W. 83rd St. New York 212/496-6031 Coffee Bar", "Café Lalo", "201 W. 83rd St. New York")), ######################## (("Cafe des Artistes 1 W. 67th St. New York City 212-877-3500 French (Classic)", "Cafe des Artistes 1 W. 67th St. New York City French (Classic)", "Cafe des Artistes", "1 W. 67th St."), ("Café des Artistes 1 W. 67th St. New York 212/877-3500 Continental", "Café des Artistes", "1 W. 67th St. New York")), ######################## (("Carmine's 2450 Broadway New York City 212-362-2200 Italian", "Carmine's 2450 Broadway New York City Italian", "Carmine's", "2450 Broadway"), ("Carmine's 2450 Broadway between 90th and 91st Sts. New York 212/362-2200 Italian", "Carmine's", "2450 Broadway between 90th and 91st Sts. New York")), ######################## (("Carnegie Deli 854 Seventh Ave. New York City 212-757-2245 Delis", "Carnegie Deli 854 Seventh Ave. New York City Delis", "Carnegie Deli", "854 Seventh Ave."), ("Carnegie Deli 854 7th Ave. between 54th and 55th Sts. New York 212/757-2245 Delicatessen", "Carnegie Deli", "854 7th Ave. between 54th and 55th Sts. New York")), ######################## (("Chanterelle 2 Harrison St. New York City 212-966-6960 French (New)", "Chanterelle 2 Harrison St. New York City French (New)", "Chanterelle", "2 Harrison St."), ("Chanterelle 2 Harrison St. near Hudson St. New York 212/966-6960 American", "Chanterelle", "2 Harrison St. near Hudson St.")), ######################## (("CDaniel 20 E. 76th St. New York City 212-288-0033 French (New)", "CDaniel 20 E. 76th St. New York City French (New)", "CDaniel", "20 E. 76th St."), ("Daniel 20 E. 76th St. New York 212/288-0033 French", "Daniel", "20 E. 76th St.")), ######################## (("Dawat 210 E. 58th St. New York City 212-355-7555 Indian", "Dawat 210 E. 58th St. New York City Indian", "Dawat", "210 E. 58th St."), ("Dawat 210 E. 58th St. New York 212/355-7555 Asian", "Dawat", "210 E. 58th St.")), ######################## (("Felidia 243 E. 58th St. New York City 212-758-1479 Italian", "Felidia 243 E. 58th St. New York City Italian", "Felidia", "243 E. 58th St."), ("Felidia 243 E. 58th St. New York 212/758-1479 Italian", "Felidia", "243 E. 58th St.")), ######################## (("Four Seasons 99 E. 52nd St. New York City 212-754-9494 American (New)", "Four Seasons 99 E. 52nd St. New York City American (New)", "Four Seasons", "99 E. 52nd St."), ("Four Seasons Grill Room 99 E. 52nd St. New York 212/754-9494 American", "Four Seasons Grill Room", "99 E. 52nd St.")), ######################## (("Gotham Bar & Grill 12 E. 12th St. New York City 212-620-4020 American (New)", "Gotham Bar & Grill 12 E. 12th St. New York City American (New)", "Gotham Bar & Grill", "12 E. 12th St."), ("Gotham Bar & Grill 12 E. 12th St. New York 212/620-4020 American", "Gotham Bar & Grill", "12 E. 12th St.")), ######################## (("Gramercy Tavern 42 E. 20th St. New York City 212-477-0777 American (New)", "Gramercy Tavern 42 E. 20th St. New York City American (New)", "Gramercy Tavern", "42 E. 20th St."), ("Gramercy Tavern 42 E. 20th St. between Park Ave. S and Broadway New York 212/477-0777 American", "Gramercy Tavern", "42 E. 20th St.")), ######################## (("Island Spice 402 W. 44th St. New York City 212-765-1737 Caribbean", "Island Spice 402 W. 44th St. New York City Caribbean", "Island Spice", "402 W. 44th St."), ("Island Spice 402 W. 44th St. New York 212/765-1737 tel Caribbean", "Island Spice", "402 W. 44th St.")), ######################## (("Jo Jo 160 E. 64th St. New York City 212-223-5656 French Bistro", "Jo Jo 160 E. 64th St. New York City French Bistro", "Jo Jo", "160 E. 64th St."), ("Jo Jo 160 E. 64th St. New York 212/223-5656 American", "Jo Jo", "160 E. 64th St.")), ######################## (("La Caravelle 33 W. 55th St. New York City 212-586-4252 French (Classic)", "La Caravelle 33 W. 55th St. New York City French (Classic)", "La Caravelle", "33 W. 55th St."), ("La Caravelle 33 W. 55th St. New York 212/586-4252 French", "La Caravelle", "33 W. 55th St.")), ######################## (("La Cote Basque 60 W. 55th St. New York City 212-688-6525 French (Classic)", "La Cote Basque 60 W. 55th St. New York City French (Classic)", "La Cote Basque", "60 W. 55th St."), ("La Cote Basque 60 W. 55th St. between 5th and 6th Ave. New York 212/688-6525 French", "La Cote Basque", "60 W. 55th St. between 5th and 6th Ave.")), ######################## (("Le Bernardin 155 W. 51st St. New York City 212-489-1515 Seafood", "Le Bernardin 155 W. 51st St. New York City Seafood", "Le Bernardin", "155 W. 51st St."), ("Le Bernardin 155 W. 51st St. New York 212/489-1515 French", "Le Bernardin", "155 W. 51st St.")), ######################## (("Les Celebrites 155 W. 58th St. New York City 212-484-5113 French (Classic)", "Les Celebrites 155 W. 58th St. New York City French (Classic)", "Les Celebrites", "155 W. 58th St."), ("Les Célébrités 160 Central Park S New York 212/484-5113 French", "Les Célébrités", "160 Central Park S")), ######################## (("Lespinasse (New York City) 2 E. 55th St. New York City 212-339-6719 Asian", "Lespinasse (New York City) 2 E. 55th St. New York City Asian", "Lespinasse", "2 E. 55th St."), ("Lespinasse 2 E. 55th St. New York 212/339-6719 American", "Lespinasse", "2 E. 55th St.")), ######################## (("Lutece 249 E. 50th St. New York City 212-752-2225 French (Classic)", "Lutece 249 E. 50th St. New York City French (Classic)", "Lutece", "249 E. 50th St."), ("Lutèce 249 E. 50th St. New York 212/752-2225 French", "Lutèce", "249 E. 50th St.")), ######################## (("Manhattan Ocean Club 57 W. 58th St. New York City 212-371-7777 Seafood", "Manhattan Ocean Club 57 W. 58th St. New York City Seafood", "Manhattan Ocean Club", "57 W. 58th St."), ("Manhattan Ocean Club 57 W. 58th St. New York 212/ 371-7777 Seafood", "Manhattan Ocean Club", "57 W. 58th St.")), ######################## (("March 405 E. 58th St. New York City 212-754-6272 American (New)", "March 405 E. 58th St. New York City American (New)", "March", "405 E. 58th St."), ("March 405 E. 58th St. New York 212/754-6272 American", "March", "405 E. 58th St.")), ######################## (("Mesa Grill 102 Fifth Ave. New York City 212-807-7400 Southwestern", "Mesa Grill 102 Fifth Ave. New York City Southwestern", "Mesa Grill", "102 Fifth Ave."), ("Mesa Grill 102 5th Ave. between 15th and 16th Sts. New York 212/807-7400 American", "Mesa Grill", "102 5th Ave. between 15th and 16th Sts.")), ######################## (("Mi Cocina 57 Jane St. New York City 212-627-8273 Mexican", "Mi Cocina 57 Jane St. New York City Mexican", "Mi Cocina", "57 Jane St."), ("Mi Cocina 57 Jane St. off Hudson St. New York 212/627-8273 Mexican", "Mi Cocina", "57 Jane St. off Hudson St.")), ######################## (("Montrachet 239 W. Broadway New York City 212-219-2777 French Bistro", "Montrachet 239 W. Broadway New York City French Bistro", "Montrachet", "239 W. Broadway"), ("Montrachet 239 W. Broadway between Walker and White Sts. New York 212/ 219-2777 French", "Montrachet", "239 W. Broadway between Walker and White Sts.")), ######################## (("Oceana 55 E. 54th St. New York City 212-759-5941 Seafood", "Oceana 55 E. 54th St. New York City Seafood", "Oceana", "55 E. 54th St."), ("Oceana 55 E. 54th St. New York 212/759-5941 Seafood", "Oceana", "55 E. 54th St.")), ######################## (("Park Avenue Cafe (New York City) 100 E. 63rd St. New York City 212-644-1900 American (New)", "Park Avenue Cafe (New York City) 100 E. 63rd St. New York City American (New)", "Park Avenue Cafe", "100 E. 63rd St."), ("Park Avenue Cafe 100 E. 63rd St. New York 212/644-1900 American", "Park Avenue Cafe", "100 E. 63rd St.")), ######################## (("Petrossian 182 W. 58th St. New York City 212-245-2214 Russian", "Petrossian 182 W. 58th St. New York City Russian", "Petrossian", "182 W. 58th St."), ("Petrossian 182 W. 58th St. New York 212/245-2214 French", "Petrossian", "182 W. 58th St.")), ######################## (("Picholine 35 W. 64th St. New York City 212-724-8585 Mediterranean", "Picholine 35 W. 64th St. New York City Mediterranean", "Picholine", "35 W. 64th St."), ("Picholine 35 W. 64th St. New York 212/724-8585 Mediterranean", "Picholine", "35 W. 64th St.")), ######################## (("Pisces 95 Ave. A New York City 212-260-6660 Seafood", "Pisces 95 Ave. A New York City Seafood", "Pisces", "95 Ave."), ("Pisces 95 Ave. A at 6th St. New York 212/260-6660 Seafood", "Pisces", "95 Ave. A at 6th St.")), ######################## (("Rainbow Room 30 Rockefeller Plaza New York City 212-632-5000 American (New)", "Rainbow Room 30 Rockefeller Plaza New York City American (New)", "Rainbow Room", "30 Rockefeller Plaza"), ("Rainbow Room 30 Rockefeller Plaza New York 212/632-5000 or 212/632-5100 American", "Rainbow Room", "30 Rockefeller Plaza")), ######################## (("River Cafe 1 Water St. Brooklyn 718-522-5200 American (New)", "River Cafe 1 Water St. Brooklyn American (New)", "River Cafe", "1 Water St. Brooklyn"), ("River Café 1 Water St. at the East River Brooklyn 718/522-5200 American", "River Café", "1 Water St. at the East River Brooklyn")), ######################## (("San Domenico 240 Central Park S. New York City 212-265-5959 Italian", "San Domenico 240 Central Park S. New York City Italian", "San Domenico", "240 Central Park S."), ("San Domenico 240 Central Park S New York 212/265-5959 Italian", "San Domenico", "240 Central Park S")), ######################## (("Second Avenue Deli 156 Second Ave. New York City 212-677-0606 Delis", "Second Avenue Deli 156 Second Ave. New York City Delis", "Second Avenue Deli", "156 Second Ave."), ("Second Avenue Deli 156 2nd Ave. at 10th St. New York 212/677-0606 Delicatessen", "Second Avenue Deli", "156 2nd Ave. at 10th St.")), ######################## (("Seryna 11 E. 53rd St. New York City 212-980-9393 Japanese", "Seryna 11 E. 53rd St. New York City Japanese", "Seryna", "11 E. 53rd St."), ("Seryna 11 E. 53rd St. New York 212/980-9393 Asian", "Seryna", "11 E. 53rd St.")), ######################## (("Shun Lee Palace 155 E. 55th St. New York City 212-371-8844 Chinese", "Shun Lee Palace 155 E. 55th St. New York City Chinese", "Shun Lee Palace", "155 E. 55th St."), ("Shun Lee West 43 W. 65th St. New York 212/371-8844 Asian", "Shun Lee West", "43 W. 65th St.")), ######################## (("Sign of the Dove 1110 Third Ave. New York City 212-861-8080 American (New)", "Sign of the Dove 1110 Third Ave. New York City American (New)", "Sign of the Dove", "1110 Third Ave."), ("Sign of the Dove 1110 3rd Ave. at 65th St. New York 212/861-8080 American", "Sign of the Dove", "1110 3rd Ave. at 65th St.")), ######################## (("Smith & Wollensky 797 Third Ave. New York City 212-753-1530 Steakhouses", "Smith & Wollensky 797 Third Ave. New York City Steakhouses", "Smith & Wollensky", "797 Third Ave."), ("Smith & Wollensky 201 E. 49th St. New York 212/753-1530 American", "Smith & Wollensky", "201 E. 49th St.")), ######################## (("Tavern on the Green Central Park West New York City 212-873-3200 American (New)", "Tavern on the Green Central Park West New York City American (New)", "Tavern on the Green", "Central Park West"), ("Tavern on the Green In Central Park at 67th St. New York 212/873-3200 American", "Tavern on the Green", "In Central Park at 67th St.")), ######################## (("Uncle Nick's 747 Ninth Ave. New York City 212-245-7992 Greek", "Uncle Nick's 747 Ninth Ave. New York City Greek", "Uncle Nick's", "747 Ninth Ave."), ("Uncle Nick's 747 9th Ave. between 50th and 51st Sts. New York 212/315-1726 Mediterranean", "Uncle Nick's", "747 9th Ave. between 50th and 51st Sts.")), ######################## (("Union Square Cafe 21 E. 16th St. New York City 212-243-4020 American (New)", "Union Square Cafe 21 E. 16th St. New York City American (New)", "Union Square Cafe", "21 E. 16th St."), ("Union Square Cafe 21 E. 16th St. New York 212/243-4020 American", "Union Square Cafe", "21 E. 16th St.")), ######################## (("Virgil's Real BBQ 152 W. 44th St. New York City 212-921-9494 BBQ", "Virgil's Real BBQ 152 W. 44th St. New York City BBQ", "Virgil's Real BBQ", "152 W. 44th St."), ("Virgil's 152 W. 44th St. New York 212/ 921-9494 American", "Virgil's", "152 W. 44th St.")), ######################## (("Chin's 3200 Las Vegas Blvd. S. Las Vegas 702-733-8899 Chinese", "Chin's 3200 Las Vegas Blvd. S. Las Vegas Chinese", "Chin's", "3200 Las Vegas Blvd. S."), ("Chin's 3200 Las Vegas Blvd. S Las Vegas 702/733-8899 Asian", "Chin's", "3200 Las Vegas Blvd. S")), ######################## (("Coyote Cafe (Las Vegas) 3799 Las Vegas Blvd. S. Las Vegas 702-891-7349 Southwestern", "Coyote Cafe (Las Vegas) 3799 Las Vegas Blvd. S. Las Vegas Southwestern", "Coyote Cafe", "3799 Las Vegas Blvd. S."), ("Coyote Café 3799 Las Vegas Blvd. S Las Vegas 702/891-7349 Southwestern", "Coyote Café", "3799 Las Vegas Blvd. S")), ######################## (("Le Montrachet Bistro 3000 Paradise Rd. Las Vegas 702-732-5651 French Bistro", "Le Montrachet Bistro 3000 Paradise Rd. Las Vegas French Bistro", "Le Montrachet Bistro", "3000 Paradise Rd."), ("Le Montrachet 3000 W. Paradise Rd. Las Vegas 702/732-5111 Continental", "Le Montrachet", "3000 W. Paradise Rd.")), ######################## (("Palace Court 3570 Las Vegas Blvd. S. Las Vegas 702-731-7110 French (New)", "Palace Court 3570 Las Vegas Blvd. S. Las Vegas French (New)", "Palace Court", "3570 Las Vegas Blvd. S."), ("Palace Court 3570 Las Vegas Blvd. S Las Vegas 702/731-7547 Continental", "Palace Court", "3570 Las Vegas Blvd. S.")), ######################## (("Second Street Grill 200 E. Fremont St. Las Vegas 702-385-6277 Pacific Rim", "Second Street Grill 200 E. Fremont St. Las Vegas Pacific Rim", "Second Street Grill", "200 E. Fremont St."), ("Second Street Grille 200 E. Fremont St. Las Vegas 702/385-3232 Seafood", "Second Street Grille", "200 E. Fremont St.")), ######################## (("Steak House The 2880 Las Vegas Blvd. S. Las Vegas 702-734-0410 Steakhouses", "Steak House The 2880 Las Vegas Blvd. S. Las Vegas Steakhouses", "Steak House The", "2880 Las Vegas Blvd. S."), ("Steak House 2880 Las Vegas Blvd. S Las Vegas 702/734-0410 Steak Houses", "Steak House", "2880 Las Vegas Blvd. S")), ######################## (("Tillerman The 2245 E. Flamingo Rd. Las Vegas 702-731-4036 Steakhouses", "Tillerman The 2245 E. Flamingo Rd. Las Vegas Steakhouses", "Tillerman The", "2245 E. Flamingo Rd."), ("Tillerman 2245 E. Flamingo Rd. Las Vegas 702/731-4036 Seafood", "Tillerman", "2245 E. Flamingo Rd.")), ######################## (("Abruzzi 2355 Peachtree Rd. NE Atlanta 404-261-8186 Italian", "Abruzzi 2355 Peachtree Rd. NE Atlanta Italian", "Abruzzi", "2355 Peachtree Rd. NE"), ("Abruzzi 2355 Peachtree Rd. Peachtree Battle Shopping Center Atlanta 404/261-8186 Italian", "Abruzzi", "2355 Peachtree Rd. Peachtree Battle Shopping Center")), ######################## (("Bacchanalia 3125 Piedmont Rd. Atlanta 404-365-0410 Californian", "Bacchanalia 3125 Piedmont Rd. Atlanta Californian", "Bacchanalia", "3125 Piedmont Rd."), ("Bacchanalia 3125 Piedmont Rd. near Peachtree Rd. Atlanta 404/365-0410 International", "Bacchanalia", "3125 Piedmont Rd. near Peachtree Rd.")), ######################## (("Bone's Restaurant 3130 Piedmont Rd. NE Atlanta 404-237-2663 Steakhouses", "Bone's Restaurant 3130 Piedmont Rd. NE Atlanta Steakhouses", "Bone's Restaurant", "3130 Piedmont Rd. NE"), ("Bone's 3130 Piedmont Road Atlanta 404/237-2663 American", "Bone's", "3130 Piedmont Road")), ######################## (("Brasserie Le Coze 3393 Peachtree Rd. Atlanta 404-266-1440 French Bistro", "Brasserie Le Coze 3393 Peachtree Rd. Atlanta French Bistro", "Brasserie Le Coze", "3393 Peachtree Rd."), ("Brasserie Le Coze 3393 Peachtree Rd. Lenox Square Mall near Neiman Marcus Atlanta 404/266-1440 French", "Brasserie Le Coze", "3393 Peachtree Rd. Lenox Square Mall near Neiman Marcus")), ######################## (("Buckhead Diner 3073 Piedmont Rd. Atlanta 404-262-3336 American (New)", "Buckhead Diner 3073 Piedmont Rd. Atlanta American (New)", "Buckhead Diner", "3073 Piedmont Rd."), ("Buckhead Diner 3073 Piedmont Road Atlanta 404/262-3336 American", "Buckhead Diner", "3073 Piedmont Road")), ######################## (("Ciboulette Restaurant 1529 Piedmont Ave. Atlanta 404-874-7600 French (New)", "Ciboulette Restaurant 1529 Piedmont Ave. Atlanta French (New)", "Ciboulette Restaurant", "1529 Piedmont Ave."), ("Ciboulette 1529 Piedmont Ave. Atlanta 404/874-7600 French", "Ciboulette", "1529 Piedmont Ave.")), ######################## (("Delectables 1 Margaret Mitchell Sq. Atlanta 404-681-2909 Cafeterias", "Delectables 1 Margaret Mitchell Sq. Atlanta Cafeterias", "Delectables", "1 Margaret Mitchell Sq."), ("Delectables 1 Margaret Mitchell Sq. Atlanta 404/681-2909 American", "Delectables", "1 Margaret Mitchell Sq.")), ######################## (("Georgia Grille 2290 Peachtree Rd. Atlanta 404-352-3517 Southwestern", "Georgia Grille 2290 Peachtree Rd. Atlanta Southwestern", "Georgia Grille", "2290 Peachtree Rd."), ("Georgia Grille 2290 Peachtree Rd. Peachtree Square Shopping Center Atlanta 404/352-3517 American", "Georgia Grille", "2290 Peachtree Rd. Peachtree Square Shopping Center")), ######################## (("GHedgerose Heights Inn The 490 E. Paces Ferry Rd. NE Atlanta 404-233-7673 Continental", "GHedgerose Heights Inn The 490 E. Paces Ferry Rd. NE Atlanta Continental", "GHedgerose Heights Inn The", "490 E. Paces Ferry Rd. NE"), ("Hedgerose Heights Inn 490 E. Paces Ferry Rd. Atlanta 404/233-7673 International", "Hedgerose Heights Inn", "490 E. Paces Ferry Rd.")), ######################## (("Heera of India 595 Piedmont Ave. Atlanta 404-876-4408 Indian", "Heera of India 595 Piedmont Ave. Atlanta Indian", "Heera of India", "595 Piedmont Ave."), ("Heera of India 595 Piedmont Ave. Rio Shopping Mall Atlanta 404/876-4408 Asian", "Heera of India", "595 Piedmont Ave. Rio Shopping Mall")), ######################## (("Indigo Coastal Grill 1397 N. Highland Ave. Atlanta 404-876-0676 Eclectic", "Indigo Coastal Grill 1397 N. Highland Ave. Atlanta Eclectic", "Indigo Coastal Grill", "1397 N. Highland Ave."), ("Indigo Coastal Grill 1397 N. Highland Ave. Atlanta 404/876-0676 Caribbean", "Indigo Coastal Grill", "1397 N. Highland Ave.")), ######################## (("La Grotta 2637 Peachtree Rd. NE Atlanta 404-231-1368 Italian", "La Grotta 2637 Peachtree Rd. NE Atlanta Italian", "La Grotta", "2637 Peachtree Rd."), ("La Grotta 2637 Peachtree Rd. Peachtree House Condominium Atlanta 404/231-1368 Italian", "La Grotta", "2637 Peachtree Rd. Peachtree House Condominium")), ######################## (("Mary Mac's Tea Room 224 Ponce de Leon Ave. Atlanta 404-876-1800 Southern/Soul", "Mary Mac's Tea Room 224 Ponce de Leon Ave. Atlanta Southern/Soul", "Mary Mac's Tea Room", "224 Ponce de Leon Ave."), ("Mary Mac's Tea Room 224 Ponce de Leon Ave. Atlanta 404/876-1800 Southern", "Mary Mac's Tea Room", "224 Ponce de Leon Ave.")), ######################## (("Nikolai's Roof 255 Courtland St. Atlanta 404-221-6362 Continental", "Nikolai's Roof 255 Courtland St. Atlanta Continental", "Nikolai's Roof", "255 Courtland St."), ("Nikolai's Roof 255 Courtland St. at Harris St. Atlanta 404/221-6362 Continental", "Nikolai's Roof", "255 Courtland St. at Harris St.")), ######################## (("Pano's & Paul's 1232 W. Paces Ferry Rd. Atlanta 404-261-3662 American (New)", "Pano's & Paul's 1232 W. Paces Ferry Rd. Atlanta American (New)", "Pano's & Paul's", "1232 W. Paces Ferry Rd."), ("Pano's and Paul's 1232 W. Paces Ferry Rd. Atlanta 404/261-3662 International", "Pano's & Paul's", "1232 W. Paces Ferry Rd.")), ######################## (("Ritz-Carlton Cafe (Buckhead) 3434 Peachtree Rd. NE Atlanta 404-237-2700 American (New)", "Ritz-Carlton Cafe (Buckhead) 3434 Peachtree Rd. NE Atlanta American (New)", "Ritz-Carlton Cafe", "3434 Peachtree Rd. NE"), ("Café Ritz-Carlton Buckhead,3434 Peachtree Rd. Atlanta Georgia 404/237-2700 ext 6108 International", "Café Ritz-Carlton Buckhead", "3434 Peachtree Rd.")), ######################## (("Ritz-Carlton Dining Room (Buckhead) 3434 Peachtree Rd. NE Atlanta 404-237-2700 American (New)", "Ritz-Carlton Dining Room (Buckhead) 3434 Peachtree Rd. NE Atlanta American (New)", "Ritz-Carlton Dining Room", "3434 Peachtree Rd."), ("Dining Room Ritz-Carlton Buckhead 3434 Peachtree Rd. Atlanta 404/237-2700 International", "Dining Room Ritz-Carlton Buckhead", "3434 Peachtree Rd.")), ######################## (("Ritz-Carlton Restaurant 181 Peachtree St. Atlanta 404-659-0400 French (Classic)", "Ritz-Carlton Restaurant 181 Peachtree St. Atlanta French (Classic)", "Ritz-Carlton Restaurant", "181 Peachtree St."), ("Restaurant Ritz-Carlton Atlanta 181 Peachtree St. Atlanta 404/659-0400 Continental", "Restaurant Ritz-Carlton", "181 Peachtree St.")), ######################## (("Toulouse 293-B Peachtree Rd. Atlanta 404-351-9533 French (New)", "Toulouse 293-B Peachtree Rd. Atlanta French (New)", "Toulouse", "293-B Peachtree Rd."), ("Toulouse B Peachtree Rd. Atlanta 404/351-9533 French", "Toulouse", "B Peachtree Rd.")), ######################## (("Veni Vidi Vici 41 14th St. Atlanta 404-875-8424 Italian", "Veni Vidi Vici 41 14th St. Atlanta Italian", "Veni Vidi Vici", "41 14th St."), ("Veni Vidi Vici 41 14th St. Atlanta 404/875-8424 Italian", "Veni Vidi Vici", "41 14th St.")), ######################## (("Alain Rondelli 126 Clement St. San Francisco 415-387-0408 French (New)", "Alain Rondelli 126 Clement St. San Francisco French (New)", "Alain Rondelli", "126 Clement St."), ("Alain Rondelli 126 Clement St. San Francisco 415/387-0408 French", "Alain Rondelli", "126 Clement St.")), ######################## (("Aqua 252 California St. San Francisco 415-956-9662 American (New)", "Aqua 252 California St. San Francisco American (New)", "Aqua", "252 California St."), ("Aqua 252 California St. San Francisco 415/956-9662 Seafood", "Aqua", "252 California St.")), ######################## (("Boulevard 1 Mission St. San Francisco 415-543-6084 American (New)", "Boulevard 1 Mission St. San Francisco American (New)", "Boulevard", "1 Mission St."), ("Boulevard 1 Mission St. San Francisco 415/543-6084 American", "Boulevard", "1 Mission St.")), ######################## (("Cafe Claude 7 Claude Ln. San Francisco 415-392-3505 French Bistro", "Cafe Claude 7 Claude Ln. San Francisco French Bistro", "Cafe Claude", "7 Claude Ln."), ("Café Claude 7 Claude La. San Francisco 415/392-3505 French", "Café Claude", "7 Claude La.")), ######################## (("Campton Place 340 Stockton St. San Francisco 415-955-5555 American (New)", "Campton Place 340 Stockton St. San Francisco American (New)", "Campton Place", "340 Stockton St."), ("Campton Place 340 Stockton St. San Francisco 415/955-5555 American", "Campton Place", "340 Stockton St.")), ######################## (("Chez Michel 804 North Point St. San Francisco 415-775-7036 Californian", "Chez Michel 804 North Point St. San Francisco Californian", "Chez Michel", "804 North Point St."), ("Chez Michel 804 Northpoint San Francisco 415/775-7036 French", "Chez Michel", "804 Northpoint")), ######################## (("Fleur de Lys 777 Sutter St. San Francisco 415-673-7779 French (New)", "Fleur de Lys 777 Sutter St. San Francisco French (New)", "Fleur de Lys", "777 Sutter St."), ("Fleur de Lys 777 Sutter St. San Francisco 415/673-7779 French", "Fleur de Lys", "777 Sutter St.")), ######################## (("Fringale 570 Fourth St. San Francisco 415-543-0573 French Bistro", "Fringale 570 Fourth St. San Francisco French Bistro", "Fringale", "570 Fourth St."), ("Fringale 570 4th St. San Francisco 415/543-0573 French", "Fringale", "570 4th St.")), ######################## (("Hawthorne Lane 22 Hawthorne St. San Francisco 415-777-9779 Californian", "Hawthorne Lane 22 Hawthorne St. San Francisco Californian", "Hawthorne Lane", "22 Hawthorne St."), ("Hawthorne Lane 22 Hawthorne St. San Francisco 415/777-9779 American", "Hawthorne Lane", "22 Hawthorne St.")), ######################## (("Khan Toke Thai House 5937 Geary Blvd. San Francisco 415-668-6654 Thai", "Khan Toke Thai House 5937 Geary Blvd. San Francisco Thai", "Khan Toke Thai House", "5937 Geary Blvd."), ("Khan Toke Thai House 5937 Geary Blvd. San Francisco 415/668-6654 Asian", "Khan Toke Thai House", "5937 Geary Blvd.")), ######################## (("La Folie 2316 Polk St. San Francisco 415-776-5577 French (New)", "La Folie 2316 Polk St. San Francisco French (New)", "La Folie", "2316 Polk St."), ("La Folie 2316 Polk St. San Francisco 415/776-5577 French", "La Folie", "2316 Polk St.")), ######################## (("LuLu Restaurant-Bis-Cafe 816 Folsom St. San Francisco 415-495-5775 Mediterranean", "LuLu Restaurant-Bis-Cafe 816 Folsom St. San Francisco Mediterranean", "LuLu Restaurant-Bis-Cafe","816 Folsom St."), ("LuLu 816 Folsom St. San Francisco 415/495-5775 Mediterranean", "LuLu", "816 Folsom St.")), ######################## (("Masa's 648 Bush St. San Francisco 415-989-7154 French (New)", "Masa's 648 Bush St. San Francisco French (New)", "Masa's", "648 Bush St."), ("Masa's 648 Bush St. San Francisco 415/989-7154 French", "Masa's", "648 Bush St.")), ######################## (("Mifune 1737 Post St. San Francisco 415-922-0337 Japanese", "Mifune 1737 Post St. San Francisco Japanese", "Mifune", "1737 Post St."), ("Mifune Japan Center Kintetsu Building 1737 Post St. San Francisco 415/922-0337 Asian", "Mifune", "Japan Center Kintetsu Building 1737 Post St.")), ######################## (("PlumpJack Cafe 3127 Fillmore St. San Francisco 415-563-4755 American (New)", "PlumpJack Cafe 3127 Fillmore St. San Francisco American (New)", "PlumpJack Cafe", "3127 Fillmore St."), ("PlumpJack Café 3201 Fillmore St. San Francisco 415/563-4755 Mediterranean", "PlumpJack Café", "3201 Fillmore St.")), ######################## (("Postrio 545 Post St. San Francisco 415-776-7825 Californian", "Postrio 545 Post St. San Francisco Californian", "Postrio", "545 Post St."), ("Postrio 545 Post St. San Francisco 415/776-7825 American", "Postrio", "545 Post St.")), ######################## (("Ritz-Carlton Dining Room (San Francisco) 600 Stockton St. San Francisco 415-296-7465 French (New)", "Ritz-Carlton Dining Room (San Francisco) 600 Stockton St. San Francisco French (New)", "Ritz-Carlton Dining Room", "600 Stockton St."), ("Ritz-Carlton Restaurant and Dining Room 600 Stockton St. San Francisco 415/296-7465 American", "Ritz-Carlton Restaurant and Dining Room", "600 Stockton St.")), ######################## (("Rose Pistola 532 Columbus Ave. San Francisco 415-399-0499 Italian", "Rose Pistola 532 Columbus Ave. San Francisco Italian", "Rose Pistola", "532 Columbus Ave."), ("Rose Pistola 532 Columbus Ave. San Francisco 415/399-0499 Italian", "Rose Pistola", "532 Columbus Ave."))] # 112 duplicats assert len(self.liste) == 112 self.nb_dup = 112 BC = 1 DEP = 2 class Recall(object): def __init__(self, nb_dup): self.bc_match_full = self.bc_match_name = self.bc_match_address = 0 self.dep_match_full = self.dep_match_name = self.dep_match_address = 0 self.bc_total_full = self.bc_total_name = self.bc_total_address = 0 self.dep_total_full = self.dep_total_name = self.dep_total_address = 0 self.bc_total_full_filtered = self.bc_total_name_filtered = self.bc_total_address_filtered = 0 self.dep_total_full_filtered = self.dep_total_name_filtered = self.dep_total_address_filtered = 0 self.NB_DUP = nb_dup def __str__(self): recall_name = 100 * (0.0 + self.bc_match_name + self.dep_match_name) / self.NB_DUP recall_address = 100 * (0.0 + self.bc_match_address + self.dep_match_address) / self.NB_DUP prec_name = 100 * (0.0 + self.bc_match_name + self.dep_match_name) / (self.bc_total_name + self.dep_total_name) prec_address = 100 * (0.0 + self.bc_match_address + self.dep_match_address) / (self.bc_total_address + self.dep_total_address) prec_name_filtered = 100 * (0.0 + self.bc_match_name + self.dep_match_name) / (self.bc_total_name_filtered + self.dep_total_name_filtered) prec_address_filtered = 100 * (0.0 + self.bc_match_address + self.dep_match_address) / (self.bc_total_address_filtered + self.dep_total_address_filtered) f_mesure_name = (2 * prec_name * recall_name) / (prec_name + recall_name) f_mesure_address = (2 * prec_address * recall_address) / (prec_address + recall_address) f_mesure_name_filtered = (2 * prec_name_filtered * recall_name) / (prec_name_filtered + recall_name) f_mesure_address_filtered = (2 * prec_address_filtered * recall_address) / (prec_address_filtered + recall_address) res = ("bc_match_full = " +str(self.bc_match_full) + ' / bc_match_name = ' +str(self.bc_match_name) + ' / bc_match_address = ' +str(self.bc_match_address)+ '\ndep_match_full = ' +str(self.dep_match_full) + ' / dep_match_name = ' +str(self.dep_match_name) + ' / dep_match_address = ' +str(self.dep_match_address) + '\nRecall Name = ' +str(recall_name) + '\nRecall Address = ' +str(recall_address) + '\nbc_total_full = ' +str(self.bc_total_full) + ' / bc_total_name = ' +str(self.bc_total_name) + ' / bc_total_address = ' +str(self.bc_total_address)+ '\ndep_total_full = ' +str(self.dep_total_full) + ' / dep_total_name = ' +str(self.dep_total_name) + ' / dep_total_address = ' +str(self.dep_total_address) + '\nPrecision Name = ' +str(prec_name) + '\nPrecision Address = ' +str(prec_address)+ '\nF-mesure Name = ' +str(f_mesure_name) + '\nF-mesure Address = ' +str(f_mesure_address) + '\n----------------------------------' + '\nbc_total_full_filtered = ' +str(self.bc_total_full_filtered) + ' / bc_total_name_filtered = ' +str(self.bc_total_name_filtered) + ' / bc_total_address_filtered = ' +str(self.bc_total_address_filtered)+ '\ndep_total_full_filtered = ' +str(self.dep_total_full_filtered) + ' / dep_total_name_filtered = ' +str(self.dep_total_name_filtered) + ' / dep_total_address_filtered = ' +str(self.dep_total_address_filtered) + '\nPrecision Name Filtered = ' +str(prec_name_filtered) + '\nPrecision Address Filtered = ' +str(prec_address_filtered)+ '\nF-mesure Name Filtered = ' +str(f_mesure_name_filtered) + '\nF-mesure Address Filtered = ' +str(f_mesure_address_filtered)) return res def inc(self, bcdep, part): if bcdep == BC: if part == 'full': self.bc_match_full += 1 elif part == 'name': self.bc_match_name += 1 elif part == 'address': self.bc_match_address += 1 else: if part == 'full': self.dep_match_full += 1 elif part == 'name': self.dep_match_name += 1 elif part == 'address': self.dep_match_address += 1 def inc_total(self, bcdep, part): if bcdep == BC: if part == 'full': self.bc_total_full += 1 elif part == 'name': self.bc_total_name += 1 elif part == 'address': self.bc_total_address += 1 else: if part == 'full': self.dep_total_full += 1 elif part == 'name': self.dep_total_name += 1 elif part == 'address': self.dep_total_address += 1 def inc_total_filtered(self, bcdep, part): if bcdep == BC: if part == 'full': self.bc_total_full_filtered += 1 elif part == 'name': self.bc_total_name_filtered += 1 elif part == 'address': self.bc_total_address_filtered += 1 else: if part == 'full': self.dep_total_full_filtered += 1 elif part == 'name': self.dep_total_name_filtered += 1 elif part == 'address': self.dep_total_address_filtered += 1 class DuplicateChecker(object): """Classe prenant en entrée un alignement entre Zagat et Fodor et calculant les duplicats identifiés """ def __init__(self, bbl): self.bbl = bbl self.mp = MatchedPairs() self.matched_name = [0] * self.mp.nb_dup self.matched_address = [0] * self.mp.nb_dup def run(self): self.calcPrecision() self.calcRecall() def match_part_left(self, full_text, part_text, minimum=1): """ La partie gauche de part_text est une sous-chaine de full_text""" splitted_part_text = part_text.split() nb_words = len(splitted_part_text) match = False i = nb_words while i >= minimum and not match: #if nb_words - i < minimum-1: break joined_part_text = ' '.join(splitted_part_text[:i]) if (joined_part_text in full_text and joined_part_text not in ['The','Le', 'the','of',"'s",'Restaurant']): match = True #logging.debug(part_text + ' LEFT PART MATCH '+ full_text) assert minimum <= i < nb_words break assert match == False i -= 1 if i > minimum: assert match == True,(i,minimum) return match,i def match_part_right(self, full_text, part_text, minimum=1): """ La partie droite de part_text est une sous-chaine de full_text""" splitted_part_text = part_text.split() nb_words = len(splitted_part_text) match = False i = 0 while i < nb_words and not match: if nb_words - i < minimum: break joined_part_text = ' '.join(splitted_part_text[i:]) if (joined_part_text in full_text and joined_part_text not in ['The','Le', 'the','of',"'s",'Restaurant']): match = True #logging.debug(part_text + ' RIGHT PART MATCH '+ full_text) break i += 1 return match def match_bloc(self, texte_bloc, bcdep, precision): texte_bloc = texte_bloc.lstrip(' \r\n\t') texte_bloc = texte_bloc.rstrip(' \r\n\t') if len(texte_bloc) < 1: return logging.debug("BC: "+texte_bloc) precision.inc_total(bcdep, 'name') precision.inc_total(bcdep, 'address') for stop_word in ['The', 'Ave.', 'Blvd.','New','(New)','Hotel','St.', 'Hollywood','Beverly','New York', 'French', 'American', 'Californian','Chinese','Italian','French (','American (', 'Seafood','Caribbean', 'Mexican', 'Mediterranean','Grill']: if texte_bloc.lower() == stop_word.lower(): return precision.inc_total_filtered(bcdep, 'name') precision.inc_total_filtered(bcdep, 'address') i = 0 for (Z, F) in self.mp.liste: Zfull, ZfullNoPhone, Zname, Zaddress = Z if (texte_bloc in ZfullNoPhone or self.match_part_left(ZfullNoPhone, texte_bloc) ):#or #self.match_part_right(ZfullNoPhone, texte_bloc) ): #logging.debug("MATCH FULL: "+ Zfull) #bc_match_full += 1 match_name = match_address = False match_name_left, pos_mnl = self.match_part_left(Zname, texte_bloc) if (self.matched_name[i] == 0 and (#texte_bloc in Zname or self.match_part_right(Zname, texte_bloc) or match_name_left)): match_name = True precision.inc(bcdep, 'name') self.matched_name[i] = 1 logging.debug(str(bcdep)+" MATCH NAME: "+ Zname) if match_name and match_name_left: splitted_part_text = texte_bloc.split() text_bloc_minus_name = ' '.join(splitted_part_text[pos_mnl:]) #logging.debug(str(bcdep)+"text_bloc_minus_name: X"+ text_bloc_minus_name+"X / "+Zaddress) match_address_left, pos_mal = self.match_part_left(Zaddress, text_bloc_minus_name, 3) if (self.matched_address[i] == 0 and (#(text_bloc_minus_name in Zaddress) or match_address_left)): match_address = True precision.inc(bcdep, 'address') self.matched_address[i] = 1 logging.debug(str(bcdep)+ " MATCH ADDRESS: "+ Zaddress) match_address_left, pos_mal = self.match_part_left(Zaddress, texte_bloc, 3) if (self.matched_address[i] == 0 and (#texte_bloc in Zaddress or # or self.match_part_right(Zaddress, texte_bloc, 2) or match_address_left)): match_address = True precision.inc(bcdep, 'address') self.matched_address[i] = 1 logging.debug(str(bcdep)+ " MATCH ADDRESS: "+ Zaddress) if match_name or match_address: break i += 1 def calcRecall(self): texte = self.bbl.texte recall = Recall(self.mp.nb_dup) for (B1, B2) in self.bbl.liste: # si bloc invariant -> duplicat if B1 is not None and B2 is not None and B1[0] == 'BC': logging.debug("########################################################") texte_bloc = texte[B1[1]:B1[2]] self.match_bloc(texte_bloc, BC, recall) for debut_dep, fin_dep in B1[3]: texte_bloc = texte[debut_dep:fin_dep] self.match_bloc(texte_bloc, DEP, recall) elif B1 is not None and B1[0] == 'D': logging.debug("%%%%%%%%%%%%%%%%%%%%%%%%%") texte_bloc = texte[B1[1]:B1[2]] self.match_bloc(texte_bloc, DEP, recall) elif B1 is not None: logging.debug("%%%%%%%%%%%%%%%%%%%%%%%%%") for debut_dep, fin_dep in B1[3]: texte_bloc = texte[debut_dep:fin_dep] self.match_bloc(texte_bloc, DEP, recall) print recall def calcPrecision(self): pass if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG,#INFO, format='%(asctime)s %(levelname)s %(message)s', #datefmt='%H:%M:%S', filename=os.path.join(os.getcwd(),'log.txt'), filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) test()