All computer source code presented on this page, unless it includes attribution to another author, is provided by Ed Halley under the Artistic License. Use such code freely and without any expectation of support. I would like to know if you make anything cool with the code, or need questions answered.
python/
    bindings.py
    boards.py
    buzz.py
    caches.py
    cards.py
    constraints.py
    csql.py
    english.py
    getch.py
    getopts.py
    gizmos.py
    goals.py
    improv.py
    interpolations.py
    namespaces.py
    nihongo.py
    nodes.py
    octalplus.py
    patterns.py
    physics.py
    pids.py
    pieces.py
    quizzes.py
    recipes.py
    relays.py
    romaji.py
    ropen.py
    sheets.py
    stores.py
    strokes.py
    subscriptions.py
    svgbuild.py
    testing.py
    things.py
    timing.py
    ucsv.py
    useful.py
    uuid.py
    vectors.py
    weighted.py
java/
    CSVReader.java
    CSVWriter.java
    GlobFilenameFilter.java
    RegexFilenameFilter.java
    StringBufferOutputStream.java
    ThreadSet.java
    Throttle.java
    TracingThread.java
    Utf8ConsoleTest.java
    droid/
        ArrangeViewsTouchListener.java
        DownloadFileTask.java
perl/
    CVQM.pm
    Kana.pm
    Typo.pm
cxx/
    CCache.h
    equalish.cpp
Download Typo.pm
#!/usr/bin/perl

package Typo;

=head1 NAME

Typo - a filter to correct many simple text errors defined in a list file

=head1 SYNOPSIS

    use Typo;

    Typo::load("$ENV{HOME}/.typo");

    while (<>) {
       $_ = Typo::fix($_);
       print;
    }

=head1 DESCRIPTION

C<Typo> is a very simplistic filter which looks for known errors in a string
and fixes them according to a database of fixes.

=cut

#----------------------------------------------------------------------------

use Data::Dumper;
use strict;
use warnings;

my @Typos = ();
our $Stats = undef;

=head1 FUNCTIONS

=head2 load()

    Typo::load("mytypos.txt");

Defines the typo database from the given file.  Each line in the file has
a phrase which is a commonly-found typo, followed by one or more tab
characters, followed by the corrected form of the phrase.  Blank lines or
lines which start with the C<#> symbol are ignored.

    # --spellings--
    supposably          supposedly
    supposively         supposedly
    # --grammar--
    might of been       might have been
    might of had        might have had

The database does not need more than one form to find errors with
different capitalizations, so the left-hand side should appear in
lowercase letters only.  Be sure to use real tab characters between the
two columns, and do not use special punctuation other than apostrophes
and dashes (things safe in a regex).

If no argument is given, then C<"$ENV{HOME}/.typo"> is the default
database file to be loaded.

Returns the number of typo definitions in the database after loading, or
0 for any failure.  There is currently no provision to load more than one
file to merge into the database.

=cut

sub load
{
    my $filename = shift || "$ENV{HOME}/.typo";
    @Typos = ();
    $Stats = {}
        if $Stats;

    # read the raw file of typo definitions
    my $fh;
    open($fh, $filename) and do
    {
        while (<$fh>)
        {
            next if /^\s*[;\#]/;
            next if /^\s*$/;
            chomp;
            my ($wrong, $right) = split /\t+/;
            next if not $right;
            push(@Typos,
                 [ $wrong, $right ]);
            push(@Typos,
                 [ ucfirst($wrong), ucfirst($right) ])
                if ucfirst($wrong) ne $wrong;
        }
    };

    # sort by descending original length
    @Typos =
        map { $_->[1] }
        sort { $a->[0] <=> $b->[0] }
        map { [ length($_->[0]), $_ ] }
        @Typos;

    # special word-edge rules
    # (your .typo file can contain almost any regex except a . rule)
    $_->[0] =~ s/ \s+ / \\s+ /gx
        foreach @Typos;
    $_->[0] =~ s/ \. / \\. /gx
        foreach @Typos;
    $_->[0] = qr/ (?<![\-<>\$\%\@\=a-zA-Z0-9_.!?\/\\])
                  $_->[0]
                  (?![\-<>\$\%\@\=a-zA-Z0-9_\/\\]) /x
        foreach @Typos;

    # diagnostics
    # print Dumper $Typos[0], $Typos[-1];
    return scalar @Typos;
}

=head2 fix()

    $fixed = Typo::fix( $text );
    $fixed = Typo::fix( $text, $prefix, $postfix );

Scans a string of text and tries to replace any and all "wrong"
substrings with corrections.  Each correction is surrounded by a prefix
and postfix string, if given.

=cut

sub fix
{
    local $_ = shift;
    my $pre = shift || '';
    my $post = shift || '';
    foreach my $typo (@Typos)
    {
        my $count = s|$typo->[0]|$pre$typo->[1]$post|g;
        $Stats->{$typo->[0]} += $count
            if $Stats;
    }
    return $_;
}

#----------------------------------------------------------------------------

=head2 pipe()

    %  perl -MTypo -eTypo::pipe  < document.txt  > corrected.txt

A very rudimentary pipe filter which is useful for testing the module,
testing the definitions in your C<~/.typo> file, or making very simple
quick fixes in plain text documents.

=cut

sub pipe
{
    my $pre = shift || '{[{';
    my $post = shift || '}]}';
    load();
    while (<>)
    {
        print fix($_, $pre, $post);
    }
}

1;

=head1 SEE ALSO

I've also made an HTTP proxy script (typoxy), an XChat IRC plugin
(xtypo), and a command-line filter (typo) which each use this module.
Browse the web, chat online, or clean up documents before your eyes bleed
from the pain of reading attrocious grammar and spelling.

=head1 AUTHOR

Ed Halley <C<< ed@halley.cc >>>

=head1 COPYRIGHT AND LICENSE

Copyright 2003-2006 by Ed Halley

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut


Contact Ed Halley by email at ed@halley.cc.
Text, code, layout and artwork are Copyright © 1996-2013 Ed Halley.
Copying in whole or in part, with author attribution, is expressly allowed.
Any references to trademarks are illustrative and are controlled by their respective owners.
Make donations with PayPal - it's fast, free and secure!