Unicafen ruokalistat Pythonilla

empty 15.09.03 15:37

Hakee unicafe.fi:stä tämän päivän ruuat ja näyttää ne. Krääsät (k, vl) jne. poistetaan.

 Tekstiversio  Arvo: 6 (6 ääntä)  Äänestä: +  -
# Tein tämän koodinpätkän niin että ruokalistojen vertailu
# olisi nopeampaa ja voisin samantein painua sinne syömään
# missä on parasta sapuskaa. Tästä saattaa olla iloa muillekin
# Helsingin yliopistossa opiskeleville tai lähialueilla majaileville.
#
# Aluksi tein kaiken koodin yhteen läjään, mutta jäsentelin
# sen lopulta hyödyllisen oloisiksi funktioiksi.
# Kommentit englanniksi tällä kertaa.
#
# Hannu Kankaanpää -2003

import datetime
from urllib import urlopen


def multiReplace(text, replaces):
    """Same as str.replace(), but for multiple replace pairs.

    replaces -- a sequence of 2-tuples [('a', 'b'), ('X', 'x')]
    "
""
    for k, v in replaces:
        text = text.replace(k, v)
    return text


def formatDate(dateobj, format='d.m.y', trimZero=False):
    """Returns date in asked format. Default: 'dd.mm.yyyy'

    dateobj -- object of type datetime.datetime or datetime.date
    format -- text in which 'd' is replaced by date, 'm' by month, 'y' by year
    trimZero -- If True, leading zero will be trimmed
    "
""
    trimChar = trimZero and '0' or ''
    isodate = dateobj.isoformat()
    return multiReplace(format, [('y', isodate[0:4].lstrip(trimChar)),
                                 ('m', isodate[5:7].lstrip(trimChar)),
                                 ('d', isodate[8:10].lstrip(trimChar))])


def removeParenthesis(text):
    """Removes text within parenthesis.
    Can't handle nested or unmatched parenthesis.
   
    >>> removeParenthesis('remove(5ya)Them(42)(erger)Now')
    'removeThemNow'
    "
""
    try:
        spl = text.split('(')
        for i, s in enumerate(spl):
            if i > 0:
                spl[i] = spl[i][spl[i].index(')')+1:]
        return ''.join(spl)
    except ValueError:
        raise RuntimeError("Possibly illegal parenthesis in '%s'" % text)


def removeMultipleCommas(text):
    """Replaces multiple adjacent commas with a single comma.

    >>> removeMultipleCommas('1,,,2,3,,4')
    '1, 2, 3, 4'
    "
""
    parts = [x.strip() for x in text.split(',')]
    return ', '.join(filter(None, parts))


def printInTwoColumns(textList, columnWidth=37, wpad=2, hpad=1):
    """Prints given text in two columns.

    textList -- a list of lists that contain text line by line.
    columnWidth -- width of a column (duh)
    wpad -- space between the two columns
    hpad -- space between two rows
    "
""
    for x in range(0, len(textList), 2):
        texts = textList[x:x+2]
        if len(texts) == 1:
            texts.append([])

        for i in range(max([len(texts[0]), len(texts[1])])):
            for f in texts:
                line = i < len(f) and f[i] or ''
                print line[0:columnWidth].ljust(columnWidth + wpad),
            print

        if x < len(textList) - 2:
            for x in range(hpad):
                print


def getFoodList(data, identifierStr, startMark, endMark):
    """Creates a food list and returns it.

    First, it locates identifierStr from data, then produces the food
    list from text between the following startMark and the following endMark.

    It removes everything that is inside parenthesis.
    "
""

    i = data.find(identifierStr)
    if i == -1:
        return ['N/A']

    i1 = data.find(startMark, i) + len(startMark)
    i2 = data.find(endMark, i1)
    result = multiReplace(data[i1:i2], {'\r':'', '\n':''}.items())
    result = multiReplace(result, {'<br>':'\n', '<br />':'\n',
                                   '<br/>':'\n', '&amp;':'&'}.items())

    lines = result.split('\n')
    for i, line in enumerate(lines):
        try:
            lines[i] = removeParenthesis(line)
        except RuntimeError:
            pass

    # remove last lines if they're empty
    while not lines[-1]:
        lines = lines[:-1]

    return [removeMultipleCommas(line) for line in lines]


def showFood(places, dateobj=None):
    """Prints out food served in Unicafe restaurants on a specific day.

    places -- a list of unicafe restaurants, eg. ['porthania', 'metsatalo']
    dateobj -- object of type datetime.datetime or datetime.date, representing
               the desired date. Defaults to today.
    "
""
    if not dateobj:
        dateobj = datetime.date.today()
    date = formatDate(dateobj)

    allfoods = []
    for place in places:
        page = urlopen('http://www.unicafe.fi/ruokalistat_show/'+
                       'index.php?kayttaja=' + place)
        try:
            foods = []
            foods.append(place.capitalize())
            foods.append('=' * 35)
            foods.extend(getFoodList(page.read(), date, '<br><br>', '<br><br>'))
            allfoods.append(foods)
        finally:
            page.close()

    printInTwoColumns(allfoods)
    raw_input()


if __name__ == '__main__':
    showFood(['porthania', 'valtiotiede', 'paarakennus', 'metsatalo'])
    #Tomorrow's food in just Porthania:
    #showFood(['porthania'], datetime.date.today() + datetime.timedelta(1))
 

Tombad 19:02 15.9.03 
Varsin hyvää koodia, hienosti kommentoitu.
ane 20:47 15.9.03 
Pitäähän sitä vitonen tästä antaa, sen verran kätsy :)
empty 10:07 1.10.03 
Piti muokata tuota getFoodList():iä, sillä tänään yhden ruokalistan suluissa oli bugi ja ohjelma heitti siitä exceptionia. Parempi vaan näyttää sulkujen kera sitten eikä kaataa koko ohjelmaa.
lind 18:13 1.10.05 
No löytypähän koodi joka auttaa ymmärtämään tuota python-parsimista.