All computer source code presented on this page, unless it includes attribution to another author, is provided by Ed Halley under the Artistic License. Use such code freely and without any expectation of support. I would like to know if you make anything cool with the code, or need questions answered.
# ucsv - a drop-in extension of CSV module to read and write Unicode files


A drop-in extension of CSV module to read and write Unicode files


    The standard CSV module can read/parse and write standard ASCII
    files with comma-separated values (CSV).  This module is an
    extension/wrapper to read and write comma-separated value files in
    UTF-8 or other encodings.

    Much of the code is stolen from an example in the 2.5.2 Python
    Library Reference, with additional wrappers for Unicode-ready
    DictReader, DictWriter.  In addition, the reader tries to turn
    things that look like numbers into actual numerical values, and
    interpolates non-strings into strings suitable for the writer.

    This is a "drop in" extension for the normal CSV module.


    >>> import ucsv as csv


    Ed Halley ( 18 August 2009



import csv ; from csv import *

__version__ = '1.0 Unicode'
__dropins__ = [ 'reader', 'writer', 'DictReader', 'DictWriter' ]

import codecs
import cStringIO

class UTF8Recoder:
    '''Iterator that reads a stream encoded in any given encoding.
    The output is reencoded to UTF-8 for internal consistency.
    def __init__(self, f, encoding):
        self.reader = codecs.getreader(encoding)(f)

    def __iter__(self):
        return self

    def next(self):

class reader:
    '''A CSV reader which will iterate over lines in the CSV file "f",
    from content in the optional encoding.

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        f = UTF8Recoder(f, encoding)
        self.reader = csv.reader(f, dialect=dialect, **kwds)

    def value(self, s):
        try: return int(s)
        except: pass
        try: return float(s)
        except: pass
        return unicode(s, "utf-8")

    def next(self):
        row =
        return [ self.value(s) for s in row ]

    def __iter__(self):
        return self

class writer:
    '''A CSV writer which will write rows to CSV file "f",
    employing the given encoding.

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwds) = f
        self.encoder = codecs.lookup(encoding)[-1](f)

    def writerow(self, row):
        self.writer.writerow( [ (u'%s'%s).encode("utf-8") for s in row ] )
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        # empty queue

    def writerows(self, rows):
        for row in rows:

class DictReader:
    def __init__(self, f, fieldnames=None, restkey=None, restval=None,
                 dialect="excel", *args, **kwds):
        self.fieldnames = fieldnames    # list of keys for the dict
        self.restkey = restkey          # key to catch long rows
        self.restval = restval          # default value for short rows
        self.reader = reader(f, dialect, *args, **kwds)

    def __iter__(self):
        return self

    def next(self):
        row =
        if self.fieldnames is None:
            self.fieldnames = row
            row =

        # unlike the basic reader, we prefer not to return blanks,
        # because we will typically wind up with a dict full of None
        # values
        while row == []:
            row =
        d = dict(zip(self.fieldnames, row))
        lf = len(self.fieldnames)
        lr = len(row)
        if lf < lr:
            d[self.restkey] = row[lf:]
        elif lf > lr:
            for key in self.fieldnames[lr:]:
                d[key] = self.restval
        return d

class DictWriter:
    def __init__(self, f, fieldnames, restval="", extrasaction="raise",
                 dialect="excel", *args, **kwds):
        self.fieldnames = fieldnames    # list of keys for the dict
        self.restval = restval          # for writing short dicts
        if extrasaction.lower() not in ("raise", "ignore"):
            raise ValueError, \
                  ("extrasaction (%s) must be 'raise' or 'ignore'" %
        self.extrasaction = extrasaction
        self.writer = writer(f, dialect, *args, **kwds)

    def _dict_to_list(self, rowdict):
        if self.extrasaction == "raise":
            for k in rowdict.keys():
                if k not in self.fieldnames:
                    raise ValueError, "dict contains fields not in fieldnames"
        return [rowdict.get(key, self.restval) for key in self.fieldnames]

    def writerow(self, rowdict):
        return self.writer.writerow(self._dict_to_list(rowdict))

    def writerows(self, rowdicts):
        rows = []
        for rowdict in rowdicts:
        return self.writer.writerows(rows)

Contact Ed Halley by email at
Text, code, layout and artwork are Copyright © 1996-2013 Ed Halley.
Copying in whole or in part, with author attribution, is expressly allowed.
Any references to trademarks are illustrative and are controlled by their respective owners.
Make donations with PayPal - it's fast, free and secure!