l10n_ar_bank/wizard/geosearch.py from ingadhoc/odoo-argentina

l10n_ar_bank/wizard/geosearch.py
Summary

Maintainability

3 hrs
Test Coverage

Issues
# -*- coding: utf-8 -*-
#!/usr/bin/python

import re, sys
from strcmp import mostequivalent
from cache import urlopen, geocode
import unicodedata
import logging
_logger = logging.getLogger(__name__)

try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    _logger.debug('Can not `from BeautifulSoup import BeautifulSoup`.')

codprov_dict = {
    'argentina': {
        'buenos aires': 'B',
        'buenos aires province': 'B',
        'catamarca':'K',
        'chaco':'H',
        'chubut':'U',
        'cordoba':'X',
        'corrientes':'W',
        'entre rios':'E',
        'formosa':'P',
        'jujuy':'Y',
        'la pampa':'L',
        'la rioja':'F',
        'mendoza':'M',
        'misiones':'N',
        'neuquen':'Q',
        'rio negro':'R',
        'salta':'A',
        'san juan':'J',
        'san luis':'D',
        'santa cruz':'Z',
        'santa fe':'S',
        'santiago del estero':'G',
        'tierra del fuego':'V',
        'tucuman':'T',
    }
}

street2_searcher = {
    'argentina': [
        re.compile('(piso\s+\w+)'),
    ]
}

def strip_accents(s):
   return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))

def _st(i, s=' ', m='+'):
    i = i.lower()
    i = m.join(i.split(s))
    i = i.encode('ascii', 'xmlcharrefreplace')
    return i

#def search_zip(street, number, city, state, country, unique=True):
    #"""
    #Return zipcode. Only works with argentina.

    #>>> search_zip("rivadavia", "9800", "buenos aires", "capital federal", "argentina")
    #u'C1407DZT'
    #>>> search_zip("jose clemente paz", "1200", "jose clemente paz", "buenos aires", "argentina")
    #u'B1665BBB'
    #>>> search_zip("general paz", "5445", "general san martin", "buenos aires", "argentina")
    #u'1650'
    #"""
    #street = strip_accents(street.lower())
    #city = strip_accents(city.lower())
    #state = strip_accents(state.lower())
    #country = strip_accents(country.lower())

    #codpos = None

    #if country in ['argentina', 'ar']:

        ## Elimina titulos de calles y avidas
        #street = re.sub('\s*av\s+', '', street)

        #re_cpa = re.compile('>(\w{8})<')

        #if state in ['capital federal']:
            #codloca=['5001',]
            #codpos=['',]
        #else:
            #url="http://www3.correoargentino.com.ar/scriptsN/cpa/cpa_loca.idc?codprov=%s&pnl=%s"
            #inpage = urlopen(url % (codprov_dict[country][state], _st(city)))
            #soup = BeautifulSoup(inpage)
            #options = soup.findAll('option')
            #if len(options) == 0:
                #raise RuntimeError('No locations for "%s"' % ','.join([street, number, city, state,
                                                         #country]))

            #loca = map(lambda opt: re.search('(.*)\s*\(\d+\)',
                                             #opt.string.lower()).groups()[0].strip(), options)
            #codloca = map(lambda opt: opt['value'], options)
            #codpos = map(lambda opt: re.search('\((\d+)\)', opt.string).groups()[0], options)

        #for i in xrange(len(codloca)):
            #url="http://www3.correoargentino.com.ar/scriptsN/cpa/cpa_calle.idc?codloca=%s&pnc=%s&alt=%s"
            #inpage = urlopen(url % (codloca[i], _st(street), number))
            #soup = BeautifulSoup(inpage)
            #output = soup.body.div.table.tr.td.renderContents()
            #match = re_cpa.search(output)
            #if match: codpos[i] = match.group(1)

        #if len(codloca) > 1 and unique:
            #i = mostequivalent(loca, city)
            #return codpos[i]
        #else:
            #return unicode(codpos[0])
    #else:
        #raise NotImplementedError


def unify_geo_data(input_string):
    """
    Return unified geographic data

    >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina")
    >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Capital Federal',\
                 'street': u'Av Rivadavia', 'street2': '',\
                 'longitud': -58.503058099999997, 'zip': 'C1407DZU'}
    True
    >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina")
    >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Buenos Aires', 'street': u'Gral. Paz',\
                 'street2': '', 'longitud': -58.513873400000001}
    True
    >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina")
    >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\
                 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\
                 'number': u'360', 'state': u'Capital Federal',\
                 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998}
    True
    >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,VICENTE LOPEZ,BUENOS AIRES,Argentina")
    >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\
                 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\
                 'number': u'767', 'state': u'Buenos Aires Province',\
                 'street': u'Av Del Libertador Gral. San Martin',\
                 'longitud': -58.471314999999997}
    True
    >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina")
    >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\
                 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\
                 'number': u'255', 'state': u'La Pampa', 'street': u'Pellegrini',\
                 'longitud': -64.292496499999999}
    True
    """
    #print >> sys.stderr, "Unifying:", input_string
    input_string = input_string.lower()
    # Remove sporius data for search and store it in street2
    street2 = []
    for country in street2_searcher.keys():
        if country in input_string:
            for rexp in street2_searcher[country]:
                match = rexp.search(input_string)
                if match:
                    street2.append(','.join(match.groups()))
                    input_string = rexp.sub('', input_string)
    street2 = ','.join(street2)
    input_string = input_string.encode('ascii', 'ignore')
    
    # Search data in geographics database
    try:
        place, (lat, lng) = geocode(_st(input_string, " ", " "))
    except ValueError:
        places = list(geocode(input_string, exactly_one=False))
        i = mostequivalent(map(lambda (a,b): a, places), input_string)
        place, (lat, lng) = places[i]
    data = {}
    result = map(lambda s: s.strip(), place.split(','))
    result = [u'']*(4-len(result)) + result
    # Ordering data
    if len(result) == 4:
        address, data['city'], data['state'], data['country'] = result
    else:
        raise RuntimeError('Exists more than 4 tokens in the place.')
    data['latitud'] = lat
    data['longitud'] = lng
    # Split address data
    if data['country'] in ['Argentina',]:
        s = re.search(r'^\s*(.*)\s+(\d+)\s*$', address)
        if s != None:
            street, number = s.groups()
        else:
            street = input_string.split(',')[0]
            s = re.search(r'(.*)\s+(\d+)', street)
            if s != None:
                street, number = s.groups()
            else:
                number = ''
            data['city'] = address
    else:
        number, street = re.search(r'(\d*)\s+(.*)', address).groups()
    data['street'] = street.strip()
    data['street2'] = street2.strip()
    data['number'] = number.strip()
    # Load zip data
    #try:
        #data['zip'] = search_zip(data['street'], data['number'], data['city'],
                                 #data['state'], data['country'])
    #except:
    data['zip'] = ''
    return data

def test_suite():
    import doctest
    return doctest.DocTestSuite()

if __name__ == "__main__":
    import unittest
    runner = unittest.TextTestRunner()
    runner.run(test_suite())