Source code for dbxml2rst.nodes

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# pylint: disable=C0103,C0321,R0912,R0914,R0915
# disable fixme-warnings:
# pylint: disable=W0511

u"""
    dbxml2rst.nodes
    ~~~~~~~~~~~~~~~

    Library with XML-filters to convert DocBook-XML to reST

    :copyright:  Copyright (C) 2017  Markus Heiser
    :license:    GPL V3.0, see LICENSE for details.
"""

# ==============================================================================
# imports
# ==============================================================================

import re
from html.parser import HTMLParser # pylint: disable=F0401
from lxml import etree

from fspath import FSPath

from .helper import Container
from .helper import LOG

INT_ENTITES = Container()
INT_ENTITES["nbsp"] = r" "
for x in ["frac12", "frac13", "frac14", "times", "copy", "hellip", "le", "ge"
          , "sub", "sup", "micro", "plusmn", "mdash", "alpha", "num", "ndash" ]:
    INT_ENTITES[x] = HTMLParser().unescape("&%s;" % x)

# ==============================================================================
[docs]def filterXML(
        folder, inFile, outFile
        , xmlFilter
        , parseIncludes = False
        , fragTag = None, ID = None ):
# ==============================================================================

    u"""Replace some xml-Markups with reST literals."""

    folder    = FSPath(folder)
    outFile   = FSPath(outFile)

    xmlFilter.parseData.update(
        outFile         = outFile
        , parseIncludes = parseIncludes )

    rootNode  = xmlFilter.parseFile(folder, inFile, fragTag=fragTag, ID=ID)
    xmlFilter.walk(rootNode)
    with (folder / outFile).openTextFile("w") as out:
        out.write(
            # pylint: disable=E1101
            etree.tostring(rootNode, encoding='unicode')
        )

# ==============================================================================
[docs]def subTemplate(inFile, outFile):
# ==============================================================================

    u"""Substitude kenerle-doc place holder in docbook.tmpl files."""

    # detailed description see class ReSTTemplate
    tmpl_re = re.compile(r"^!([EIDFPC])([^\s]*)\s+(.*?)\s*$")
    tag_format = """<rstTemplate op="%s" fname="%s" args="%s"/>"""

    with inFile.openTextFile() as src, outFile.openTextFile("w") as dst:
        for _lineno, orig_line in enumerate(src):
            line = orig_line
            if tmpl_re.match(orig_line):
                match = tmpl_re.match(orig_line)
                op, fname, args = match.groups()
                line = tag_format % (op, fname, args)
            dst.write(line)

# ==============================================================================
[docs]def subEntities(inFile, outFile, ext_entities, int_entities):
# ==============================================================================

    u"""Substitude internal and external entities.

    :param str folder: Root-folder where conversion takes place.
    :param str inFile: Preprocess XML file.

    :param ext_entities: container with the external entities
    :type ext_entities: EntityContainer or None

    :param int_entities: container with the internal entities
    :type int_entities: EntityContainer or None

    External entities will be replaced by a ``<rstInclude fname='%s'/>`` tag.
    XMLTag.rstInclude_tag
    """
    entity = re.compile(r'''&(?P<name>[a-zA-Z][0-9a-zA-Z_-]+);''')

    with inFile.openTextFile() as src, outFile.openTextFile("w") as dst:
        for orig_line in src:
            line = orig_line
            # FIXME: pass "*" as &#x22C6;
            line = line.replace("*", "&#x22C6;")
            for match in entity.finditer(orig_line):
                name = match.group('name')
                if int_entities:
                    sub = int_entities.get(name, None)
                    if sub:
                        line = line.replace("&%s;" % name, sub)
                if ext_entities:
                    sub  = ext_entities.get(name, None)
                    if sub is None:
                        sub  = ext_entities.get("chunk_" + name, None)
                    if sub:
                        line = line.replace(
                            "&%s;" % name
                            , "<%s fname='%s'/>" % (XMLTag.rstInclude_tag, sub))

            # internal entities within internal entities .. grrr
            if int_entities:
                for match in entity.finditer(line):
                    name = match.group('name')
                    sub  = int_entities.get(name, None)
                    if sub:
                        line = line.replace("&%s;" % name, sub)
            # write to file
            dst.write(line)


# ==============================================================================
[docs]class XMLTagType(type):
# ==============================================================================

    typeList  = dict()
    undifined = object()

    def __new__(mcs, name, bases, namespace):
        tag = namespace.get("tag", mcs.undifined)
        if name == "XMLTag":
            tag = None
        if tag == mcs.undifined:
            tag = name.lower()
        namespace["tag"] = tag
        cls = super().__new__(mcs, name, bases, namespace)

        # tag = None -->  marks intermediate classes
        if tag is not None:
            if mcs.typeList.get(cls.tag, mcs.undifined) != mcs.undifined:
                raise KeyError("tag <%s> allready defined in class %r"
                               % (cls.tag, mcs.typeList[cls.tag]))
            mcs.typeList[cls.tag] = cls

        return cls

[docs]    @classmethod
    def getClassByTag(mcs, tagName):
        return mcs.typeList.get(tagName, None)

[docs]    @classmethod
    def getTagInstance(mcs, node, parseData):
        u"""Returns a XMLTag instance which fits to the tag or ``None``."""
        xmlTag = None
        TagCls = mcs.getClassByTag(node.tag)
        if TagCls is not None:
            xmlTag = TagCls()
            xmlTag.parseData.update(parseData)
        return xmlTag

# ==============================================================================
[docs]class XMLTag(metaclass=XMLTagType):
# ==============================================================================

    def __init__(self):
        super().__init__()

        # some metadata about the parsing process
        self.parseData = Container(
            # customers hooks to call first on every node
            hooks           = []
            # folder where the xml-file is located
            , folder        = None
            # relativ pathname of the xml-file (relative to self.folder)
            , fname         = None
            # rstInclude requires the outFile from it's parent, to guess the
            # filesuffix for the ouptut of the include files.
            , parseIncludes = False
            , outFile       = None
            )

[docs]    def walk(self, node, rstPrefix=""):
        u"""Walks through the node-tree and applies matching filters on each node."""

        # First, call the hooks. Hooks might build a complete new subtree, they
        # have to return the node to walk on and this node might have a differnt
        # tag type!

        for func in self.parseData.hooks:
            node = func(node, rstPrefix, self.parseData)
            if node is None:
                raise Exception("hook %r doesn't return a xml node!'" % func)

        xmlTag = XMLTagType.getTagInstance(node, self.parseData)
        if xmlTag is not None:
            xmlTag.applyFilter(node, rstPrefix)
            if xmlTag.breakFlag:
                return
        self.walkChilds(node, rstPrefix + self.rstBlock)

[docs]    def walkChilds(self, node, rstPrefix=""):
        for child in node.iterchildren():
            self.walk(child, rstPrefix)

[docs]    def parseFile(self, folder, fname, fragTag=None, ID=None):
        u"""Tries to parse the XML file with :py:mod:`lxml.etree`.

        A unknown entity within a xml (fragment) will cause an exception. In
        this case, run :py:func:`subEntities` first!"""

        self.parseData.update(
            # folder where the xml-file is located
            folder   = folder
            # relativ pathname of the xml-file (relative to self.folder)
            , fname  = fname
            )

        preTag  = ""
        postTag = ""
        if fragTag:
            preTag  = u"<%s%s>" % (fragTag, ' id="%s"' % ID if ID is not None else "")
            postTag = u"</%s>" % fragTag

        fname    = FSPath(folder / fname)
        xmlFlag  = False
        rootNode = None

        with fname.openTextFile() as f:
            if f.readline().startswith("<?xml"):
                xmlFlag = True
        if xmlFlag:
            rootNode = etree.parse(fname).getroot() # pylint: disable=E1101
        else:
            content = fname.readFile()
            rootNode = etree.fromstring( # pylint: disable=E1101
                u"<dummy>"
                + preTag
                + content
                + postTag
                + u"</dummy>")
        return rootNode


[docs]    @classmethod
    def insertAsRawHTML(cls, node):
        literal = etree.tostring(node, encoding="unicode") # pylint: disable=E1101
        literal = literal.replace("\t","")
        literal = literal.strip()
        raw = "\n.. raw:: html\n%s\n\n" % cls.blockText("    ", literal)
        new = cls.getInjBlockTag()
        new.text += raw
        cls.replaceNode(node, new)

[docs]    @classmethod
    def chunkNode(cls, node, folder, fname):
        # break recursion, from the caller
        if node.get("chunkNode") is not None:
            return
        node.set("chunkNode", "1")
        folder = FSPath(folder)
        fname  = FSPath(fname)
        inclTag = node.makeelement(cls.rstInclude_tag)
        inclTag.set("fname", fname.suffix(".xml"))
        LOG.info("INFO: create chunk %s" % (etree.tostring(inclTag, encoding="unicode"))) # pylint: disable=E1101
        with (folder / fname).openTextFile("w") as out:
            out.write(
                # pylint: disable=E1101
                etree.tostring(node, encoding='unicode') )
        cls.replaceNode(node, inclTag)

    # ---------------
    # nodes
    # ---------------

[docs]    @classmethod
    def copyNode(cls, node, tag=None, moveID=False):
        tag = tag or node.tag
        new = node.makeelement(tag)
        new.text = node.text
        new.tail = node.tail
        new[:] = node
        for k,v in node.items():
            new.set(k, v)
        if moveID:
            ID = node.get("id")
            if ID is not None:
                del node.attrib["id"]
                new.set("id", ID)
        return new

[docs]    @classmethod
    def dropNode(cls, node):
        parent = node.getparent()
        if parent is not None:
            parent.remove(node)
        else:
            raise Exception("node %s is the root node / can't droped" % node)

[docs]    @classmethod
    def replaceNode(cls, oldNode, newNode):
        parent = oldNode.getparent()
        if parent is not None:
            parent.replace(oldNode, newNode)
        else:
            Exception("node %s is the root node / can't replaced" % oldNode)


    # The <programlisting> content will be passed through *as is* by pandoc
    # DocBook reader and can be handled in the pandoc filter.

    rstInjection_sig = "!ri!"

[docs]    @classmethod
    def pandocFilter(cls, key, value, fmt, meta): # pylint: disable=W0613
        if key == 'CodeBlock':

            # DocBook injection --> "programlisting"
            # --------------------------------------
            txt = value[1]
            if txt.startswith(cls.rstInjection_sig):
                new = txt[len(cls.rstInjection_sig):]
                return {'t': 'Plain', 'c': [{'t': 'Str', 'c': new}]}

        if key == 'Code':

            # DocBook injection --> "code"
            # --------------------------------------

            txt = value[1]
            if txt.startswith(cls.rstInjection_sig):
                new = txt[len(cls.rstInjection_sig):]
                return {'t': 'Str', 'c': new}

[docs]    @classmethod
    def getInjInlineTag(cls):
        # pylint: disable=E1101
        new = etree.Element("code")            # pandoc --> "CodeBlock"
        new.text = cls.rstInjection_sig
        new.set("rstInjection", "1")
        return new

[docs]    @classmethod
    def getInjBlockTag(cls):
        # pylint: disable=E1101
        new = etree.Element("programlisting")  # pandoc --> "Code"
        new.text = cls.rstInjection_sig
        new.set("rstInjection", "1")
        return new

[docs]    def applyFilter(self, node, rstPrefix):

        if self.dropFlag:
            self.dropNode(node)
            return

        # injection is done as pandoc "Code" or "CodeBlock"
        def getInjTag():
            if self.injBlock:  new = self.getInjBlockTag()
            else:              new = self.getInjInlineTag()
            return new

        preText = self.preText(node, rstPrefix)
        if preText:
            new = getInjTag()
            new.text += preText
            node.addprevious(new)

        postText = self.postText(node, rstPrefix)
        if postText:
            new = getInjTag()
            new.text += postText
            node.addnext(new)

        replaceText = self.replaceText(node, rstPrefix)
        if replaceText:
            new = getInjTag()
            new.text += replaceText
            new.tail = node.tail
            self.replaceNode(node, new)

    # ---------------
    # reST
    # ---------------

    rstInclude_tag  = "rstInclude"
    rstTemplate_tag = "rstTemplate"

[docs]    @classmethod
    def normalizeID(cls, ID):
        # not needed, may be later
        #if ID is not None:
        #    ID = ID.replace("-","_")
        return ID

[docs]    @classmethod
    def getText(cls, *nodelist):
        u"""The text *as is* from ``node.itertext()``"""
        text = ""
        for node in nodelist:
            if node is not None:
                text += "".join([ txt for txt in node.itertext()])
        retVal = None
        if text != "":
            retVal = text
        return retVal

[docs]    @classmethod
    def getStripedText(cls, *nodelist):
        u"""The text from node.itertext() with reduced whitespaces"""
        text = cls.getText(*nodelist)
        if text is not None:
            return re.sub(r"\s+"," ", text).strip()

[docs]    @classmethod
    def getFormatedTitle(cls, node, from_tag="title"):
        u"""title as one line, prefixed with py:attribute:`rstBlock`"""
        title = ""
        title_node = node.find(from_tag)
        if title_node is not None:
            title = cls.rstBlock + cls.getStripedText(title_node)
        return title

[docs]    @classmethod
    def blockText(cls, prefix, text):
        u"""Adds ``prefix`` to lines from ``text``, deletes trailing whitespaces"""
        if text is None:
            return None
        text = text.strip("\n")
        retVal = "\n"
        for line in text.split("\n"):
            line = line.rstrip()
            if line:
                retVal += prefix + line
            retVal += "\n"
        return retVal + "\n\n"

    # ---------------
    # Subclassing API
    # ---------------

    # xml tag-name e.g "section"
    tag = XMLTagType.undifined

    # inject rst as inline or as block
    injBlock = False

    # Tag-Filter breaks XML recursion
    breakFlag = False

    # Tag should be droped
    dropFlag = False

    # additional prefix for lines within *this* rst-Block
    rstBlock  = ""
    rstAnchor = "\n.. _%(ID)s:\n"
    rstMarkup = None

    # pylint: disable=W0613

[docs]    def getContext(self, node):
        return Container(
            ID = self.normalizeID(node.attrib.get('id')))

[docs]    def replaceText(self, node, rstPrefix): # pylint: disable=R0201
        return None

[docs]    def preText(self, node, rstPrefix):     # pylint: disable=R0201
        return None

[docs]    def postText(self, node, rstPrefix):    # pylint: disable=R0201
        return None


# ==============================================================================
[docs]class LinkTag(XMLTag):
    tag = None
# ==============================================================================

    rstMarkup = ":ref:`%(text)s <%(linkend)s>`"

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.text = self.getStripedText(node)
        ctx.linkend = self.normalizeID(node.attrib.get("linkend"))
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = self.rstMarkup
        if not ctx.text:
            rst = ":ref:`%(linkend)s`"
        return rst % ctx

# ------------------------------------------------------------------------------
[docs]class Link(LinkTag): pass
[docs]class Xref(LinkTag): pass
# ------------------------------------------------------------------------------

# ==============================================================================
[docs]class Ulink(LinkTag):
# ==============================================================================

    rstMarkup = "`%(text)s <%(linkend)s>`__"

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.linkend = self.normalizeID(node.attrib.get("url"))
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = self.rstMarkup
        # FIXME: droped ref-text, because it is (mostly) redundant and long refs
        # are killed by tables
        if not ctx.text:
            rst = "%(linkend)s"
        return rst % ctx

# ==============================================================================
[docs]class Constant(XMLTag):
# ==============================================================================

    # a inline literal should contain not any leading/trailing whitespace

    replaceTag = None

[docs]    def applyFilter(self, node, rstPrefix):
        if self.replaceTag is not None:
            # Structure tags like <refsection> and <section> are simmular
            newNode = self.copyNode(node, self.replaceTag, moveID=True)
            self.breakFlag = True
            self.replaceNode(node, newNode)
            #SDK.CONSOLE()
            self.walk(newNode, rstPrefix)
        else:
            if not node.text or not node.text.strip():
                # drop empty inline literals
                self.dropNode(node)
                self.breakFlag = True
            else:
                node.text = node.text.strip()
                super().applyFilter(node, rstPrefix=self.rstBlock)

# ------------------------------------------------------------------------------
[docs]class Property(Constant):      replaceTag = "constant"
[docs]class Token(Constant):         replaceTag = "constant"
[docs]class Filename(Constant):      replaceTag = "constant"
[docs]class Varname(Constant):       replaceTag = "constant"
# ------------------------------------------------------------------------------

# ==============================================================================
[docs]class Subtitle(XMLTag):
# ==============================================================================

    breakFlag = True
    injBlock  = True
    rstMarkup = "\n**%(title)s**\n\n"

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.title = self.getStripedText(node)
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        return self.rstMarkup % self.getContext(node)

# ==============================================================================
[docs]class StructureTag(XMLTag):
    tag = None
# ==============================================================================

    breakFlag      = False
    injBlock       = True
    rstPreMarkup   = "\n.. _%(ID)s:\n"
    rstTitleMarkup = "="
    replaceTag     = None

[docs]    def applyFilter(self, node, rstPrefix):

        if self.replaceTag is not None:
            # Structure tags like <refsection> and <section> are simmular
            newNode = self.copyNode(node, self.replaceTag, moveID=True)
            self.breakFlag = True
            self.replaceNode(node, newNode)
            #SDK.CONSOLE()
            self.walk(newNode, rstPrefix)
        else:
            # Structure tag resets the indentation rstPrefix
            super().applyFilter(node, rstPrefix=self.rstBlock)

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.title = self.getFormatedTitle(node)
        return ctx

[docs]    @classmethod
    def rstTitle(cls, title):
        return ("\n" + title
                + "\n" + (cls.rstTitleMarkup * len(title))
                + "\n\n")

[docs]    def preText(self, node, rstPrefix):
        #SDK.CONSOLE()
        rst = ""
        ctx = self.getContext(node)
        if ctx.ID is not None:
            rst += self.rstPreMarkup
        if ctx.title:
            rst += self.rstTitle(ctx.title)
        # drop no more needed child nodes!
        n = node.find("title")
        if n is not None:
            self.dropNode(n)
        #print("%r preText -->|%s|<--" % (node, (rst % ctx)))
        return rst % ctx

# ==============================================================================
[docs]class Section(StructureTag):
# ==============================================================================

    rstTitleMarkup = "="
[docs]    def applyFilter(self, node, rstPrefix):
        sectLevel = 0
        parent = node.getparent()
        while parent is not None:
            if parent.tag in ["section",]:
                #SDK.CONSOLE()
                sectLevel += 1
            parent = parent.getparent()
        self.rstTitleMarkup = '=-^"+'[sectLevel]
        super().applyFilter(node, rstPrefix="")

[docs]    def rstTitle(self, title):
        return ("\n" + title
                + "\n" + (self.rstTitleMarkup * len(title))
                + "\n\n")

# ------------------------------------------------------------------------------
[docs]class Appendix(StructureTag):       replaceTag = "chapter"
[docs]class Bibliography(StructureTag):   replaceTag = "chapter"
[docs]class Legalnotice(StructureTag):    replaceTag = "section"
[docs]class Para(StructureTag):           pass
[docs]class Sect1(StructureTag):          replaceTag = "section"
[docs]class Sect2(StructureTag):          replaceTag = "section"
[docs]class Sect3(StructureTag):          replaceTag = "section"
# ------------------------------------------------------------------------------

# ------------------------------------------------------------------------------
[docs]class Part(StructureTag):
# ------------------------------------------------------------------------------
    rstTitleMarkup = "#"

[docs]    def applyFilter(self, node, rstPrefix):
        partinfo = node.find("partinfo")
        if partinfo is not None:
            # move it to the end of the part
            chapter = self.copyNode(partinfo, "chapter", moveID=True)
            title   = partinfo.makeelement("title")
            title.text = "Revision and Copyright"
            chapter.insert(0, title)
            node.append(chapter)
            node.remove(partinfo)

        super().applyFilter(node, rstPrefix)

[docs]    @classmethod
    def rstTitle(cls, title):
        return ("\n"   + (cls.rstTitleMarkup * len(title))
                + "\n" + title
                + "\n" + (cls.rstTitleMarkup * len(title))
                + "\n\n")

[docs]class Bookinfo(StructureTag): replaceTag = "part"
[docs]class Setinfo(StructureTag): replaceTag = "part"

# ------------------------------------------------------------------------------
[docs]class Chapter(StructureTag):
# ------------------------------------------------------------------------------
    rstTitleMarkup = "*"

[docs]    @classmethod
    def rstTitle(cls, title):
        return ("\n"   + (cls.rstTitleMarkup * len(title))
                + "\n" + title
                + "\n" + (cls.rstTitleMarkup * len(title))
                + "\n")
# ------------------------------------------------------------------------------
[docs]class Preface(Chapter): rstTitleMarkup = "="
# ------------------------------------------------------------------------------


# ==============================================================================
[docs]class Refentry(XMLTag):
# ==============================================================================

    injBlock = True

[docs]    def getContext(self, node):
        ctx = super().getContext(node)

        refentrytitle = node.find("refmeta/refentrytitle")
        refname       = node.find("refnamediv/refname")

        ID = refname.attrib.get('id')
        if ID:
            ctx.ID  = self.normalizeID(ID)
        ctx.refname = self.getStripedText(refname)
        ctx.title   = ctx.refname
        if refentrytitle is not None:
            ctx.title   = self.getStripedText(refentrytitle)
        ctx.manvol  = self.getStripedText(node.find("refmeta/manvolnum"))
        ctx.refmiscinfo  = self.getStripedText(node.find("refmeta/refmiscinfo"))
        return ctx

[docs]    def preText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n" if not ctx.ID else self.rstAnchor
        if ctx.title:
            rst += Chapter.rstTitle(ctx.title)
        if ctx.refname:
            rst += "\n*man %(refname)s(%(manvol)s)*\n"
        if ctx.refmiscinfo:
            rst += "\n*%(refmiscinfo)s*\n"
        # drop no more needed child nodes!
        for p in ["refmeta/refentrytitle" , "refmeta/manvolnum" ,
                   "refmeta/refmiscinfo", "refnamediv/refname" ]:
            n = node.find(p)
            if n is not None:
                self.dropNode(n)
        return rst % ctx


# ==============================================================================
[docs]class Refsynopsisdiv(Section):
# ==============================================================================

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.title = "Synopsis"
        return ctx

# ==============================================================================
[docs]class Refentryinfo(XMLTag):
# ==============================================================================

    # seems useless ...
    #
    # <refentryinfo>
    # <title>LINUX</title>
    # <productname>Kernel Hackers Manual</productname>
    # <date>April 2016</date>
    # </refentryinfo>

    dropFlag = True

# ------------------------------------------------------------------------------
[docs]class Refsection(Section):          replaceTag = "section"
[docs]class Refsect1(Sect1):              pass
[docs]class Refsect2(Sect2):              pass
[docs]class Refsect3(Sect3):              pass
[docs]class Refpurpose(StructureTag):     replaceTag = "para"
[docs]class Refnamediv(StructureTag):     replaceTag = "section"
# ------------------------------------------------------------------------------


# ==============================================================================
[docs]class Copyright(XMLTag):
# ==============================================================================

    breakFlag = True
    injBlock  = True

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.year = ", ".join([self.getStripedText(n) for n in node.findall("year")])
        ctx.holder = "/ ".join([self.getStripedText(n) for n in node.findall("holder")])
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n**Copyright** %(year)s : %(holder)s\n"
        return rst % ctx

# ==============================================================================
[docs]class Trademark(XMLTag):
# ==============================================================================

    trademark = u"®"

[docs]    def applyFilter(self, node, rstPrefix):
        node.text = node.text + self.trademark
        super().applyFilter(node, rstPrefix)

# ==============================================================================
[docs]class Code(XMLTag):
# ==============================================================================

    breakFlag  = False

[docs]    def applyFilter(self, node, rstPrefix):
        # ignore injected rst-literals
        if node.get("rstInjection") is None:
            super().applyFilter(node, rstPrefix)
        else:
            self.breakFlag = True

# ------------------------------------------------------------------------------
[docs]class Computeroutput(Code):   pass
# ------------------------------------------------------------------------------

# ==============================================================================
[docs]class LiteralBlock(XMLTag):
    tag = None
# ==============================================================================

    breakFlag  = True
    rstBlock   = "    "
    injBlock   = True
    language   = "guess"
    rstMarkup  = "\n::\n\n%(literal)s\n\n"

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.language = node.get("language") or self.language
        # FIXME: pass "*" as &#x22C6;
        text = self.getText(node).replace(u"⋆", "*")
        text = text.replace("&#x22C6;", "*")  # comes from unnecessary ``<![CDATA[ ...`` usages
        ctx.literal  = self.blockText(self.rstBlock, text).strip("\n")
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n" if not ctx.ID else self.rstAnchor
        rst += self.rstMarkup
        return rst % ctx

# ------------------------------------------------------------------------------
[docs]class Literallayout(LiteralBlock):    pass
[docs]class Screen(LiteralBlock):           pass
# ------------------------------------------------------------------------------

# ==============================================================================
[docs]class Programlisting(LiteralBlock):
# ==============================================================================

    # FIXME: einige <programlisting>'s sind ganze Dateien, sprich das
    # root-Element, das kann aber nicht so einfach ausgetauscht werden.

    language   = "c"
    rstMarkup = """\
.. code-block:: %(language)s

%(literal)s\n\n\n""" # pandocs eats some trailing newlines

[docs]    def applyFilter(self, node, rstPrefix):
        # ignore injected rst-literals
        if node.get("rstInjection") is None:
            super().applyFilter(node, rstPrefix)

# ------------------------------------------------------------------------------
[docs]class Funcsynopsisinfo(Programlisting): pass
[docs]class Synopsis(Programlisting): pass
# ------------------------------------------------------------------------------

# ==============================================================================
[docs]class Funcprototype(XMLTag):
# ==============================================================================

    injBlock   = True

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.funcdef = self.getStripedText(node.find("funcdef"))
        ctx.params = []
        for paramdef in node.findall("paramdef"):
            ctx.params.append(self.getStripedText(paramdef).replace(u"⋆", "*"))
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n.. c:function::"
        rst += " %s" % ctx.funcdef
        rst += "( " + ", ".join(ctx.params) + " )"
        return rst

# ==============================================================================
[docs]class Function(XMLTag):
# ==============================================================================

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.func_call = self.getStripedText(node)
        if not ctx.func_call.endswith(")"):
            ctx.func_call += "()"
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = ":c:func:`%(func_call)s`"
        return rst % ctx

# ==============================================================================
[docs]class Structname(XMLTag):
# ==============================================================================

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.struct_name = self.getStripedText(node)
        if not ctx.struct_name.startswith("struct"):
            ctx.struct_name = "struct " + ctx.struct_name
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = ":c:type:`%(struct_name)s`"
        return rst % ctx

[docs]class Structfield(Constant):   replaceTag = "constant"

# ==============================================================================
[docs]class Mediaobject(XMLTag):
# ==============================================================================

    align    = "center"

[docs]    def getContext(self, node):
        img_files = []
        align = self.align
        for imagedata in node.findall(".//imagedata"):
            img_files.append(FSPath(imagedata.attrib.get("fileref")))
            if not align:
                align = imagedata.attrib.get("align")

        ctx = super().getContext(node)
        ctx.img_files = img_files
        ctx.align     = align
        ctx.glob      = None
        if img_files:
            ctx.glob  = FSPath(img_files[0]).suffix(".*")
        ctx.text      = self.getStripedText(*node.findall(".//textobject"))
        return ctx

# ==============================================================================
[docs]class Figure(XMLTag):
# ==============================================================================

    breakFlag  = True
    injBlock   = True
    rstBlock   = "    "
    rstMarkup = """
.. figure::  %(glob)s
    :alt:    %(alt)s
    :align:  %(align)s

%(title)s
"""

[docs]    def getContext(self, node):
        ctx = Mediaobject().getContext(node)
        ctx.update(super().getContext(node))
        ctx.alt = " / ".join([f.BASENAME for f in  ctx.img_files])
        ctx.title = self.getFormatedTitle(node)
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        # This implementation treats the time only figures with imagedata in.
        if node.findall(".//imagedata") is None:
            self.breakFlag = False
            return
        ctx = self.getContext(node)
        rst = "\n" if not ctx.ID else self.rstAnchor
        rst += self.rstMarkup
        if ctx.text:
            ctx.text = self.blockText(self.rstBlock, ctx.text)
            rst += "%(text)s"
        rst += "\n\n" # pandocs eats some trailing newlines
        return rst % ctx

# ------------------------------------------------------------------------------
[docs]class Informalfigure(Figure): pass
# ------------------------------------------------------------------------------

# ==============================================================================
[docs]class Table(XMLTag):
# ==============================================================================

    # pandoc doesn't render tables title and anchors well

    injBlock  = True
    rstBlock  = "    "
    breakFlag = False

    tableStartMark = "-- table start markup --"
    tableEndMark   = "-- table end markup --"

    rstPreText  = """
.. table::%(title)s

%(tableStartMark)s
"""
    rstPostText = "\n%(tableEndMark)s\n"

[docs]    def applyFilter(self, node, rstPrefix):
        if node.find("tbody/tr") is not None:
            self.insertAsRawHTML(node)
            self.breakFlag = True
            return
        node.set("pgwide", "1")
        self.assert_tgroup(node)
        super().applyFilter(node, rstPrefix)

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.tableStartMark = self.tableStartMark
        ctx.tableEndMark = self.tableEndMark
        title_node = node.find("title")
        ctx.title  = ""
        if title_node is not None:
            ctx.title = " " + self.getStripedText(title_node)
        return ctx

[docs]    def preText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n" if not ctx.ID else self.rstAnchor
        rst += self.rstPreText
        # drop no more needed child nodes!
        title_node = node.find("title")
        if title_node is not None:
            self.dropNode(title_node)
        return rst % ctx

[docs]    def postText(self, node, rstPrefix):
        ctx = self.getContext(node)
        return self.rstPostText % ctx

[docs]    @classmethod
    def assert_tgroup(cls, node):
        # To render the rst output well, pandoc requires a tgroup with col
        # definitions.
        tgroup = node.find("tgroup")
        if tgroup is None:
            tgroup = node.makeelement("tgroup")
            node.insert(0, tgroup)
            cols = len(node.find("tbody/row").findall("./entry"))
            tgroup.set("cols", str(cols))
            w = 1
            for c in range(1, cols+1):
                if cols < 3 or (cols + 1) == c:
                    w += 1
                colspec = node.makeelement("colspec")
                colspec.set("colname", "c%s" % c)
                colspec.set("colwidth", "%s*" % w)
                tgroup.append(colspec)
            tbody = node.find("tbody")
            tgroup.append(tbody)
        else:
            colspec_s = tgroup.findall("colspec")
            cols = len(colspec_s)
            c = w = 1
            for colspec in colspec_s:
                c += 1
                if colspec.get("colwidth") is None:
                    if cols < 3 or (cols + 1) == c:
                        w += 1
                    colspec.set("colwidth", "%s*" % w)



# ==============================================================================
[docs]class Informaltable(Table):
# ==============================================================================

    injBlock  = True
    breakFlag = False

    # Informaltable within media DocBook are often used to draw a border around
    # a paragraph. This breaks the separation of *presentation from content*.

[docs]    def applyFilter(self, node, rstPrefix):
        tgroup = node.find("tgroup")
        cols = int(tgroup.attrib.get("cols"))
        if cols == 1:
            self.dropUselessTable(node, rstPrefix)
        else:
            super().applyFilter(node, rstPrefix)

[docs]    def dropUselessTable(self, node, rstPrefix):
        self.breakFlag = True
        etree.strip_tags(node, "tgroup", "tbody", "row", "entry") # pylint: disable=E1101
        section = self.copyNode(node, "section", moveID=True)
        self.replaceNode(node, section)
        self.walk(section, rstPrefix + self.rstBlock)

# ==============================================================================
[docs]class Tgroup(XMLTag):
# ==============================================================================

    injBlock  = True
    breakFlag = False

    # There is a entity within the media_api.xml:
    #
    # <!ENTITY cs-def  "<colspec colname='c1' colwidth='3*' /> \
    #                  <colspec colname='c2' colwidth='1*' />  \
    #                  <colspec colname='c3' colwidth='4*' /> \
    #                  <spanspec spanname='hspan' namest='c1' nameend='c3' />">
    #
    # This cs-def entity defines 3 columns, but it is partial used in tables
    # with 2 columns, which is mistakeable /  e.g.: frontend_legacy_api.p1_xml the
    #
    # <table pgwide="1" frame="none" id="fe-bandwidth">
    #     <title>enum fe_bandwidth</title>
    #     <tgroup cols="2">   <!-- TWO cols   !!! -->
    #         &cs-def;        <!-- THREE cols !!! -->


[docs]    def applyFilter(self, node, rstPrefix):
        self.repairTableDef(node, rstPrefix)
        super().applyFilter(node, rstPrefix)

[docs]    @classmethod
    def repairTableDef(cls, node, rstPrefix):  # pylint: disable=W0613
        cols = int(node.attrib.get("cols"))
        colspec_cols = len(node.findall("colspec"))
        if cols != colspec_cols:
            for colspec in node.iterchildren("colspec"):
                node.remove(colspec)
            for spanspec in node.iterchildren("spanspec"):
                node.remove(spanspec)
            cols = len(node.find("tbody/row").findall("./entry"))
            node.set("cols", str(cols))
            for c in range(cols):
                colspec = node.makeelement("colspec")
                colspec.set("colname", "c%s" % c)
                colspec.set("colwidth", "1*")
                node.insert(c, colspec)


# ==============================================================================
[docs]class Entrytbl(XMLTag):
# ==============================================================================

    # flatten these braindead inner-tables

    breakFlag  = True

[docs]    def applyFilter(self, node, rstPrefix):

        newEntry = node.makeelement("entry")
        for subEntry in node.findall(".//entry"):
            para = self.copyNode(subEntry, "para", moveID=True)
            newEntry.append(para)
        #SDK.CONSOLE()
        self.replaceNode(node, newEntry)
        self.walk(newEntry, rstPrefix + self.rstBlock)




# ==============================================================================
[docs]class Authorgroup(XMLTag):
# ==============================================================================

    breakFlag = True
    injBlock  = True

[docs]    def getContext(self, node):
        # pylint: disable=R0204
        ctx = super().getContext(node)
        ctx.authorlist = []
        for author in (
                node.findall("author") + node.findall("corpauthor")):
            trademark = []
            for tm in author.findall("trademark"):
                trademark.append(self.getStripedText(tm) + Trademark.trademark)
            trademark = ", ".join(trademark)
            corpauthor = trademark
            if author.tag == "corpauthor":
                corpauthor = self.getStripedText(author) + trademark
            a = Container(
                firstname     = self.getStripedText(*author.findall("firstname"))   or ""
                , surname     = self.getStripedText(*author.findall("surname"))     or ""
                , othername   = self.getStripedText(*author.findall("othername"))   or ""
                , affiliation = self.getStripedText(*author.findall("affiliation"))
                , corpauthor  = corpauthor
                , contrib     = self.getStripedText(*author.findall("contrib"))
            )
            ctx.authorlist.append(a)
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = ""
        if ctx.authorlist:
            for author in ctx.authorlist:
                r = "\n:author:   "
                if author.surname:      r += " %(surname)s"
                if author.firstname:    r += " %(firstname)s"
                if author.othername:    r += " (*%(othername)s*)"
                if author.corpauthor:   r += " %(corpauthor)s"
                if author.affiliation:  r += "\n:address:   %(affiliation)s"
                if author.contrib:      r += "\n:contrib:   %(contrib)s"
                rst += r % author
                rst += "\n"
        return rst % ctx



# ==============================================================================
[docs]class Revremark(StructureTag): replaceTag = "para"
[docs]class Revision(XMLTag):
# ==============================================================================

    injBlock  = True
    rstBlock  = "    "

[docs]    def applyFilter(self, node, rstPrefix):
        super().applyFilter(node, rstPrefix)
        self.walkChilds(node, rstPrefix + self.rstBlock)

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.revnumber      = self.getStripedText(*node.findall("revnumber"))   or ""
        ctx.date           = self.getStripedText(*node.findall("date"))        or ""
        ctx.authorinitials = self.getStripedText(*node.findall("authorinitials"))
        return ctx

[docs]    def preText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n:revision: %(revnumber)s / %(date)s"
        if ctx.authorinitials:
            rst += " (*%(authorinitials)s*)"
        #rst += "\n:remark:   %(revremark)s"
        rst += "\n\n\n"

        # drop no more needed child nodes!
        for p in ["revnumber", "date" ]:
            n = node.find(p)
            if n is not None:
                self.dropNode(n)
        return rst % ctx


# ==============================================================================
[docs]class Biblioentry(XMLTag):
# ==============================================================================

    # DocBooks biblioentrie's are usually misapplied, e.g.::
    #
    #     <biblioentry id="cea608">
    #       <abbrev>CEA 608-E</abbrev>
    #       <authorgroup>
    # 	<corpauthor>Consumer Electronics Association ...</corpauthor>
    #       </authorgroup>
    #       <title>CEA-608-E R-2014 "Line 21 Data Services"</title>
    #     </biblioentry>
    #
    # The abbrev-Tag contains whitespaces which doesn't fits to common citation
    # reference conzept.  Within old DocBook version they are refered by ID
    # (``<xref linkend="cea608"/>``) and note by ``<citation>CEA608E</citation>``.
    #
    # To solve this problem in multipart documents, more work is needed. This
    # here is just POV implementation.

    injBlock  = True
    rstBlock  = "    "

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.abbrev   = self.getStripedText(node.find("abbrev"))
        ctx.title    = self.getStripedText(node.find("title"))
        ctx.subtitle = self.getStripedText(node.find("subtitle"))
        return ctx

[docs]    def preText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = "\n" if not ctx.ID else self.rstAnchor
        if ctx.abbrev:       rst += Section().rstTitle(ctx.abbrev)
        if ctx.title:        rst += "\n:title:     %(title)s"
        if ctx.subtitle:     rst += "\n:subtitle:  %(subtitle)s"

        # drop no more needed child nodes!
        for p in ["abbrev", "title", "subtitle" ]:
            n = node.find(p)
            if n is not None:
                self.dropNode(n)
        return rst % ctx

## ==============================================================================
#class Example(Tag):
## ==============================================================================

[docs]class Informalexample(StructureTag):     replaceTag = "para"

# ==============================================================================
[docs]class ReSTTemplate(XMLTag):
# ==============================================================================

    # !E<filename> is replaced by the documentation, in <filename>, for
    # functions that are exported using EXPORT_SYMBOL: the function list is
    # collected from files listed in Documentation/DocBook/Makefile.

    # !I<filename> is replaced by the documentation for functions that are
    # _not_ exported using EXPORT_SYMBOL.

    # !D<filename> is used to name additional files to search for functions
    # exported using EXPORT_SYMBOL.   NOT USED ANYMORE!!!!!!!!!!!!!

    # !F<filename> <function [functions...]> is replaced by the
    # documentation, in <filename>, for the functions listed.

    # !P<filename> <section title> is replaced by the contents of the DOC:
    # section titled <section title> from <filename>.
    # Spaces are allowed in <section title>; do not quote the <section title>.

    # !C<filename> is replaced by nothing, but makes the tools check that
    # all DOC: sections and documented functions, symbols, etc. are used.
    # This makes sense to use when you use !F/!P only and want to verify

    tag = XMLTag.rstTemplate_tag
    injBlock = True
    rstMarkup = """
.. kernel-doc:: %(fname)s
%(options)s

"""

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.op      = node.get("op")
        ctx.fname   = node.get("fname")
        ctx.args    = node.get("args")
        ctx.options = "    :man-sect: 9\n"
        if ctx.op == "E":
            ctx.options += "    :export:\n"
        if ctx.op == "I":
            ctx.options += "    :internal:\n"
        if ctx.op == "F":
            ctx.options += "    :functions: %s\n" % ctx.args
        if ctx.op == "P":
            ctx.options += "    :doc: %s\n" % ctx.args
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        rst = self.rstMarkup
        if ctx.op in ["C", "D"]:
            rst = "\n\n.. NOT SUPPORTED: '!%(op)s%(fname)s %(args)s'\n\n"
        retVal = rst % ctx
        return retVal

# ==============================================================================
[docs]class ReSTInclude(XMLTag):
# ==============================================================================

    tag = XMLTag.rstInclude_tag
    breakFlag  = False
    injBlock   = True
    rstBlock   = "    "
    rstMarkup  = """
.. toctree::
    :maxdepth: 1

%(entries)s


"""
[docs]    def applyFilter(self, node, rstPrefix):

        if not self.parseData.parseIncludes:
            folder   = FSPath(self.parseData.folder)
            thisFile = folder / self.parseData.fname
            inclFile = folder / FSPath(node.get("fname"))
            LOG.info("INFO: <rstInclude fname='%s'> will not be parsed!" % inclFile.relpath(thisFile.DIRNAME))
        else:
            folder    = FSPath(self.parseData.folder)
            inFile    = FSPath(node.get("fname")).suffix(self.parseData.fname.SUFFIX)
            outFile   = inFile.suffix(self.parseData.outFile.SUFFIX)
            xmlFilter = XMLTag()
            xmlFilter.parseData.update(self.parseData)

            LOG.info("parsing: <rstInclude fname='%s'>" % inFile)
            filterXML(folder, inFile, outFile, xmlFilter = xmlFilter, parseIncludes = True )

        # insert rst toctree directive
        super().applyFilter(node, rstPrefix)


[docs]    def get_toctree_entry(self, node):
        folder   = FSPath(self.parseData.folder)
        thisFile = folder / self.parseData.fname
        inclFile = folder / FSPath(node.get("fname"))
        toctree  = self.rstBlock + str(inclFile.relpath(thisFile.DIRNAME).SKIPSUFFIX)
        return toctree

[docs]    def getContext(self, node):
        ctx = super().getContext(node)
        ctx.entries = []
        nextNode = node
        while (nextNode is not None
               and nextNode.tag == self.rstInclude_tag
               and nextNode.get("ignoreToctree") is None):
            ctx.entries.append(self.get_toctree_entry(nextNode))
            nextNode.set("ignoreToctree", "True")
            nextNode = nextNode.getnext()
            while isinstance(nextNode, etree._Comment): # pylint: disable=E1101, W0212
                nextNode = nextNode.getnext()

        ctx.entries = "\n".join(ctx.entries)  # pylint: disable=R0204
        return ctx

[docs]    def replaceText(self, node, rstPrefix):
        ctx = self.getContext(node)
        if ctx.entries:
            return self.rstMarkup % ctx
        else:
            return None