All computer source code presented on this page, unless it includes attribution to another author, is provided by Ed Halley under the Artistic License. Use such code freely and without any expectation of support. I would like to know if you make anything cool with the code, or need questions answered.
python/
    bindings.py
    boards.py
    buzz.py
    caches.py
    cards.py
    constraints.py
    csql.py
    english.py
    getch.py
    getopts.py
    gizmos.py
    goals.py
    improv.py
    interpolations.py
    namespaces.py
    nihongo.py
    nodes.py
    octalplus.py
    patterns.py
    physics.py
    pids.py
    pieces.py
    quizzes.py
    recipes.py
    relays.py
    romaji.py
    ropen.py
    sheets.py
    stores.py
    strokes.py
    subscriptions.py
    svgbuild.py
    testing.py
    things.py
    timing.py
    ucsv.py
    useful.py
    uuid.py
    vectors.py
    weighted.py
java/
    CSVReader.java
    CSVWriter.java
    GlobFilenameFilter.java
    RegexFilenameFilter.java
    StringBufferOutputStream.java
    ThreadSet.java
    Throttle.java
    TracingThread.java
    Utf8ConsoleTest.java
    droid/
        ArrangeViewsTouchListener.java
        DownloadFileTask.java
perl/
    CVQM.pm
    Kana.pm
    Typo.pm
cxx/
    CCache.h
    equalish.cpp
Download weighted.py
# -*- python -*-

'''

SYNOPSIS

    >>> import weighted

    >>> choices = { 'heads': 2, 'tails': 1 }
    >>> for i in xrange(100):
    ...    print weighted.choice(choices)

    This example would print a hundred coin flips, but the heads
    will appear twice as often as tails, statistically speaking.

AUTHOR

    Ed Halley (ed@halley.cc) 13 December 2007

'''

import random

def pare(choices):
    '''Given a dict of key:weight pairs, returns the total of all weights.'''
    return sum(choices.values())

def weighted(choices, total=0):
    '''Given a dict of key:weight pairs, chooses a key at random.

    The dict values are non-negative numerical weights.  Keys with higher
    values are chosen more often than keys with lower values.

    If the caller knows the total of all weights, it can be given to
    avoid recalculating it internally on each call.  If the given total
    is not accurate, a key may be chosen with a poorly-shaped
    distribution.
    '''
    if not total:
        total = pare(choices)
    mark = random.random()*total
    keys = choices.keys()
    for i in xrange(len(keys)):
        span = choices[keys[i]]
        if span > mark:
            return keys[i]
        mark -= span
    # should not reach here if total is accurate
    return random.choice(keys)

choice = weighted

if __name__ == '__main__':
    print 'Testing weighted random distribution...'
    choices = { 'ten': 10,
                'eight': 8,
                'seven': 7,
                'six': 6,
                'four': 4,
                'one': 1 }
    tally = { }
    reps = 1000000
    total = sum(choices.values())
    for i in xrange(reps):
        if 0 == i % 50000: print reps-i, '\r',
        x = weighted(choices, total)
        try: tally[x] += 1
        except: tally[x] = 1
    print reps, 'reps', '==', sum(tally.values()), 'picks'
    print "%s\t%9s %9s %s" % ('key:','picks:',
                               'fair:','bias (ideal 100%):')
    for key in choices:
        expected = choices[key]*reps/total
        print "%s\t%9d %9d (%g%%)" % (key, tally[key],
                                      expected, tally[key]*100./expected)
       


Contact Ed Halley by email at ed@halley.cc.
Text, code, layout and artwork are Copyright © 1996-2013 Ed Halley.
Copying in whole or in part, with author attribution, is expressly allowed.
Any references to trademarks are illustrative and are controlled by their respective owners.
Make donations with PayPal - it's fast, free and secure!