Source code for irclog.parser

""":mod:`irclog.parser` --- IRC log parser

This module provides a function which takes lines of log then transforms it
to message objects in :mod:`irclog.messages` module.

.. data:: PATTERN

   The :mod:`re` pattern matches to a line of IRC log message.

   .. note::

      This regular expression is originally written by
      `Kang Seonghoon`_ aka *lifthrasiir*. This pattern is posted in
      `an article of LangDev <>`_.

      .. _Kang Seonghoon:

import re
import datetime
import chardet
import irclog.messages

PATTERN = re.compile(r"""
    ^ (?:
            ---[ ]Day[ ]changed[ ].* |
            \d\d:\d\d(?::\d\d)?[ ] (?:
                -!-[ ]Irssi:[ ] |
                -!-[ ][<;]/Netsplit[ ]   # XXX
            ) .*
        ) |
            ---[ ]Log[ ]opened[ ]...[ ](?:[\d ]\d...|...)[ ]\d\d[ ]
            (?P<logopenwhen>\d\d:\d\d:\d\d)[ ]\d\d\d\d
        ) |
            ---[ ]Log[ ]closed[ ]...[ ](?:[\d ]\d...|...)[ ]\d\d[ ]
            (?P<logclosewhen>\d\d:\d\d:\d\d)[ ]\d\d\d\d
        ) |
        (?P<when>\d\d:\d\d(?::\d\d)?)[ ](?:
            -!- [ ](?:
                    (?P<nickfrom>.*?)[ ]is[ ]now[ ]known[ ]as[ ](?P<nickto>.*?)
                ) |
                    You're[ ]now[ ]known[ ]as[ ](?P<selfnickto>.*?)
                ) |
                    (?P<joinnick>.*?)[ ]\[(?P<joinident>.*?)\][ ]
                    has[ ]joined[ ](?P<joinchan>.*?)
                ) |
                    (?P<modechan>.*?)[ ]\[(?P<modelist>.*?)\][ ]
                    by[ ](?P<modenick>.*?)
                ) |
                    (?P<partnick>.*?)[ ]
                    \[(?P<partident>.*?)\][ ]
                    has[ ]left[ ](?P<partchan>.*?)[ ]
                ) |
                    (?P<quitnick>.*?)[ ] \[(?P<quitident>.*?)\][ ]
                    has[ ]quit[ ]\[(?P<quitreason>.*?)\]
                ) |
                    (?P<kicknick>.*?)[ ]
                    was[ ]kicked[ ]from[ ](?P<kickchan>.*?)[ ]
                    by[ ](?P<kickby>.*?)[ ] \[(?P<kickreason>.*?)\]
                ) |
                    (?P<topicnick>.*?)[ ]changed[ ]
                    the[ ]topic[ ]of[ ](?P<topicchan>.*?)
                    [ ]to:[ ](?P<topicline>.*?)
                ) |
                    Topic[ ]unset[ ]by[ ](?P<notopicnick>.*?)
                    [ ]on[ ](?P<notopicchan>.*?)
                ) |
            ) |
                <[ +@~]?(?P<pubnick>.*?)>[ ](?P<publine>.*?)
            ) |
                [ ]\*[ ](?P<actnick>.*?)[ ](?P<actline>.*?)
            ) |
                (?:[+@~ ])?(?P<noticechan>.*?)
                -[ ](?P<noticeline>.*?)
    ) $

RULES = {}

[docs]def parse(lines, date=None, encoding="utf-8"): """Transforms lines of log to message objects in :mod:`irclog.messages` module. :param lines: lines of code :type lines: iterable object, file object :param date: a date of the log. default is today :type date: :class:`` :param encoding: a text encoding. default is ``"utf-8"`` :returns: a list of :class:`irclog.messages.BaseMessage` instances .. note:: This is exactly a generator function. """ date = date or for line in lines: try: line = line.decode(encoding) except UnicodeDecodeError: enc = chardet.detect(line).get("encoding") or "utf-8" line = line.decode(enc, "replace") match = PATTERN.match(line.strip()) if not match: continue for group_name, function in RULES.iteritems(): if groups = match.groupdict() time = datetime.time(*map(int, groups["when"].split(":"))) groups["when"] = datetime.datetime.combine(date, time) yield function(**groups) break
[docs]def parser(function): """Registers a parser function. :param function: a function parses to register :type function: callable object :returns: passed ``function`` """ if not callable(function): raise TypeError("function must be callable") RULES[function.__name__] = function return function
[docs]def nickmsg(when, nickfrom, nickto, **_): """Parses :class:`irclog.messages.NickMessage`.""" return irclog.messages.NickMessage(when, nickfrom, nickto)
[docs]def selfnickmsg(when, selfnickto, **_): """Parses :class:`irclog.messages.SelfNickMessage`.""" return irclog.messages.SelfNickMessage(when, selfnickto)
[docs]def joinmsg(when, joinnick, joinident, joinchan, **_): """Parses :class:`irclog.messages.JoinMessage`.""" return irclog.messages.JoinMessage(when, joinnick, joinident, joinchan)
[docs]def modemsg(when, modeserver, modechan, modelist, modenick, **_): """Parses :class:`irclog.messages.ModeMessage`.""" return irclog.messages.ModeMessage(when, modeserver, modechan, modelist, modenick)
[docs]def partmsg(when, partnick, partident, partchan, partreason, **_): """Parses :class:`irclog.messages.PartMessage`.""" return irclog.messages.PartMessage(partnick, partident, partchan, partreason)
[docs]def quitmsg(when, quitnick, quitident, quitreason, **_): """Parses :class:`irclog.messages.QuitMessage`.""" return irclog.messages.QuitMessage(when, quitnick, quitident, quitreason)
[docs]def kickmsg(when, kicknick, kickchan, kickby, kickreason, **_): """Parses :class:`irclog.messages.KickMessage`.""" return irclog.messages.KickMessage(when, kicknick, kickchan, kickby, kickreason)
[docs]def topicmsg(when, topicnick, topicchan, topicline, **_): """Parses :class:`irclog.messages.TopicMessage`.""" return irclog.messages.TopicMessage(when, topicnick, topicchan, topicline)
[docs]def notopicmsg(when, notopicnick, notopicchan, **_): """Parses :class:`irclog.messages.NoTopicMessage`.""" return irclog.messages.TopicMessage(when, notopicnick, notopicchan)
[docs]def pubmsg(when, pubnick, publine, **_): """Parses :class:`irclog.messages.PublicMessage`.""" return irclog.messages.PublicMessage(when, pubnick, publine)
[docs]def actmsg(when, actnick, actline, **_): """Parses :class:`irclog.messages.ActionMessage`.""" return irclog.messages.ActionMessage(when, actnick, actline)
[docs]def noticemsg(when, noticenick, noticechan, noticeline, **_): """Parses :class:`irclog.messages.ActionMessage`.""" return irclog.messages.NoticeMessage(when, noticenick, noticechan, noticeline)