﻿from pprint import pprint

UNICODEDATA="UnicodeData.txt"
EASTASIANWIDTHS="EastAsianWidth.txt"

def readdatafile(filename):
    with open(filename) as f:
        for line in f:
            line = line.split('#')[0].strip()
            if line:
                yield line.split(';')

def builddb(raw_data):
    data = {}
    i = 0
    l = len(raw_data)
    while i < l:
        info = raw_data[i]
        if info[1][0] == '<' and info[1].endswith('First>'):
            first = raw_data[i]
            i += 1
            last = raw_data[i]

            data['{0}..{1}'.format(first[0], last[0])] = ['{0}..{1}'.format(first[1], last[1])] + first[2:10]
        else:
            data[info[0]] = info[1:10]

        i += 1

    return data

def add_eawidths(data):
    for eawidth in readdatafile(EASTASIANWIDTHS):
        if eawidth[0] in data:
            data[eawidth[0]] += [eawidth[1]]

    return data

raw_data = list(readdatafile(UNICODEDATA))

data = builddb(raw_data) 
data = add_eawidths(data)

import cStringIO, gzip

with cStringIO.StringIO() as buffer:
    print  >>buffer, '# This file is generated by {0} from {1} and {2}. Do not modify it directly.'.format(__file__, UNICODEDATA, EASTASIANWIDTHS)

    for char, info in sorted(data.iteritems(), key=lambda x: int(x[0].split('.')[0], 16)):
        print >>buffer, ';'.join([char] + info)
    
    with gzip.open('IPyUnicodeData.txt.gz', 'wb') as out:
        out.write(buffer.getvalue())
