#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import getopt
import sys
import time
from geopy import geocoders  
import csv
import traceback
from StringIO import *
import unicodedata
import re
_avirer = re.compile(u'[,\.]')

re_zipcode = re.compile('^[0-9]+')

def unaccent(str):
    str = unicodedata.normalize('NFKD', unicode(str)).encode('ascii','ignore').upper()
    str = str.replace('-' , ' ')
    str = str.replace("'" , ' ')
    return _avirer.sub('', str)



def main():
    """
    geolocalyse.py 
    """
    debug = 0
    sleep = 1
    data = sys.stdin
    output = sys.stdout
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], 's:f:o:e:d')
    except getopt.error, msg:
        usage(msg)

    for o, a in opts:
        if o == '-s': sleep = a
        if o == '-f': data = open(a)
        if o == '-o': output = open(a, 'w')
        if o == '-e': output_error = open(a, 'w')
        elif o == '-d': debug = 1

    g = geocoders.Google(resource='maps')
 
    cities = csv.reader(data, delimiter='|')
    l = 0
    for key, city, zipcode, country in cities:
        time.sleep(float(sleep))
        l = l + 1
        if debug:
            sys.stderr.write("fetching (%s,%s,%s)..\n" % (city, zipcode, country)) 
        try:
            place, (lat, lng) = g.geocode("%s, %s, %s" % (city, zipcode, country))
        except ValueError:
             fp = StringIO()
             traceback.print_exc(1,file=fp)
             sys.stderr.write(fp.getvalue())
             continue
        if debug:
            sys.stderr.write("get %s, (%s, %s))\n" % (place, lat, lng)) 
        place = place.split(", ")
        matched_city = place[0] 
        matched_country = place[-1]
        matched_zipcode = re_zipcode.match(matched_city)
        if matched_zipcode:
            matched_zipcode = matched_zipcode.group()
            matched_city = matched_city[len(matched_zipcode)+1:]
        else: 
            matched_zipcode = None
        if unaccent(country) != unaccent(matched_country):
            sys.stderr.write("country doesn't match for line %s, key %s : %s!=%s\n" % (l, key, matched_country, country))
            continue
        
        geocoded = '|'.join((key, city, zipcode, country, str(lat), str(lng))) 
        city = unaccent(city)
        matched_city = unaccent(matched_city)

        # mdm specific
        if matched_city != 'SAINTES':
            matched_city = matched_city.replace('SAINTES', 'STES').replace('SAINTE', 'STE').replace('SAINT', 'ST')

        #import ipdb;ipdb.set_trace()
        if matched_city != city:
            sys.stderr.write("city doesn't match for line %s, key %s : %s!=%s\n" % (l, key, matched_city, city))
            output_error.write(geocoded + '|%s|%s\n' % (matched_city, matched_zipcode))
            continue 
        if matched_zipcode and matched_zipcode != zipcode:
            sys.stderr.write("zipcode doesn't match for line %s, key %s : %s!=%s\n" % (l, key, matched_zipcode, zipcode))
            output_error.write(geocoded + '|%s|%s\n' % (matched_city, matched_zipcode))
            continue
        output.write(geocoded + '\n')
           
        
        
if __name__ == "__main__":
	main()

