bernhard: utils/testing print_events_start_end.py,NONE,1.1

cvs at kolab.org cvs at kolab.org
Thu Jan 11 20:20:55 CET 2007


Author: bernhard

Update of /kolabrepository/utils/testing
In directory doto:/tmp/cvs-serv3146

Added Files:
	print_events_start_end.py 
Log Message:
Added script to find events without start or end tags.


--- NEW FILE: print_events_start_end.py ---
#!/usr/bin/env python
"""Decodes Kolab event email files and prints out contained start + end dates.

Will search the current working directory for files like [1-9][0-9]*.
WARNING: This is an experimental script to run manually for analysis purposes.

Usage: %(basename)  

Example: python $(basename) | grep '!!!' | wc

Options:
    -h / --help     Print this message and exit.

Roughly testing with Python2.3.5 on sarge (needs python-xml).
"""

#initial 20070111 Bernhard <bernhard at intevation.de>
#  derived from remove_kolab_doubles.py rev1.1
#
# This program is free software under the GNU GPL (>=v2)
# Read the file COPYING coming with the software for details.

__version__="$Revision: 1.1 $"[10:-1]

import sys
import os
import getopt
import errno
import mimetypes
import email

import re

import xml.dom.minidom


# verbose = 0 (only summary messages), 1 (diagonstics per file)
#           2 (verbose infos on per file operations)
#           3 (debug per file operations)
verbose = 1
def usage(code, msg=''):
    print >> sys.stderr, __doc__
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'h', ['help'])
    except getopt.error, msg:
        usage(1, msg)

    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)


    # read all files from standard directory and only 
    matchobject = re.compile("^[1-9][0-9]*\\.$")
    allfiles = os.listdir(".")
    filelist = filter(matchobject.match, allfiles)

    #filelist=["19367.", "19374.",  "19375.",  "19376." ]

    print "Scanning %d files:" % len(filelist)


    for file in filelist:

#        if verbose > 1: print 'Dealing with "%s"' % file
        print file, ":",

        fp = open(file)
        msg = email.message_from_file(fp)
        fp.close()

        if not "X-Kolab-Type" in msg:
            if verbose > 0:
                sys.stderr.write(
                    "File %s not X-Kolab-Type, ignoring.\n" % file)
            continue

        try:
            kolabeventxml=extractkolabeventxml(msg)
        except:
            sys.stderr.write("Something wrong with file %s!\n" % file)
            raise

        if not kolabeventxml: continue

        # We want to print out the values of start-date and end-date tags

        dateelements = return_date_elements(kolabeventxml)

        print dateelements,
        # two elements are normal, otherwise we "!!!"!
        if len(dateelements)!=2:
            print "!!!"
        else:
            print ""


def extractkolabeventxml(msg):
    """Returns string contaning the x-vnd.kolab.event part of an emailobject.

    Will raise IndexError, if two mime parts of application/x-vnd.kolab.event
    are found.
    """

    kolabxmlpart = None
    for part in msg.walk():
        if part.get_content_type() == 'application/x-vnd.kolab.event':
            if not kolabxmlpart:
                kolabxmlpart=part
            else:
                sys.stderr.write("Arg, found second kolabxml part!\n")
                raise IndexError

    if kolabxmlpart:
        kolabxml = kolabxmlpart.get_payload(decode=1)
    else:
        return None

    if verbose > 3: print kolabxml

    return kolabxml


def return_date_elements(kolabxml):
    """Return contents of start-date and end-date tags if they are there.

    The result is a list containing tuple with (tagname, value).
    """

    result = []

    dom = xml.dom.minidom.parseString(kolabxml)
    event=dom.firstChild
    for child in event.childNodes:
        if child.nodeType == dom.ELEMENT_NODE:
            if child.tagName in ['start-date', 'end-date']:
                result.append((child.tagName.encode("latin-1"), 
                            child.firstChild.data.encode("latin-1")))
    dom.unlink()

    return result

if __name__ == '__main__':
    main()





More information about the commits mailing list