#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0-or-later
#
# pylint: disable=missing-docstring, arguments-differ, invalid-name
# pylint: disable=too-many-arguments, too-many-locals, too-many-branches
# pylint: disable=too-many-nested-blocks, too-many-lines
# pylint: disable=too-many-statements, useless-object-inheritance
"""
kernel_doc
~~~~~~~~~~
Implementation of the ``kernel-doc`` parser. The kernel-doc parser extracts
:ref:`kernel-doc markup <kernel-doc-intro>` from source code comments.
This module provides an API which could be used by a sphinx-doc generator
extension and a command-line interface
Compared with the Perl kernel-doc script used in the Linux kernel, this
implementation has additional features like *parse options* for a smooth
integration of reStructuredText (reST) markup in the source code comments. In
combination with the (separate) *kernel-doc* reST directive (which uses this
module), the documentation generation becomes more clear and flexible.
The architecture of the parser is simple and consists of three types of
objects (three classes).
* :py:obj:`Parser`: The parser parses the source-file and dumps extracted
kernel-doc data.
* subclasses of class :py:obj:`TranslatorAPI`: to translate the dumped
kernel-doc data into output formats. There exists two implementations:
- :py:obj:`NullTranslator`: translates nothing, just parse
- :py:obj:`ReSTTranslator`: translates dumped kernel-doc data to reST markup
* :py:obj:`ParseOptions`: a container full with options to control *parsing* and
*translation*.
With the :py:obj:`NullTranslator` a source file is parsed only once while
different output could be generated (multiple times) just by changing the
Translator (e.g. with the :py:obj:`ReSTTranslator`) and the option
container. With parsing the source files only once, the building time is reduced
n-times.
"""
import codecs
import collections
import copy
import os
import re
import sys
import textwrap
import six
from fspath import OS_ENV
from . import compat
# ==============================================================================
# common globals
# ==============================================================================
# The version numbering follows numbering of the specification
# (Documentation/books/kernel-doc-HOWTO).
__version__ = '1.0'
# ==============================================================================
# regular expresssions and helper used by the parser and the translator
# ==============================================================================
[docs]
class RE(object):
u"""regular expression that stores last match (like Perl's ``=~`` operator)"""
def __init__(self, *args, **kwargs):
self.re = re.compile(*args, **kwargs)
self.last_match = None
[docs]
def match(self, *args, **kwargs):
self.last_match = self.re.match(*args, **kwargs)
return self.last_match
[docs]
def search(self, *args, **kwargs):
self.last_match = self.re.search(*args, **kwargs)
return self.last_match
def __getattr__(self, attr):
return getattr(self.re, attr)
def __getitem__(self, group):
if group < 0 or group > self.groups - 1:
raise IndexError("group index out of range (max %s groups)" % self.groups )
if self.last_match is None:
raise IndexError("nothing has matched / no groups")
return self.last_match.group(group + 1)
# these regular expresions has been *stolen* from the kernel-doc perl script.
doc_start = RE(r"^/\*\*\s*$") # Allow whitespace at end of comment start.
doc_end = RE(r"\s*\*+/")
doc_com = RE(r"\s*\*\s*")
doc_com_section = RE(r"\s*\*\s{1,8}") # more than 8 spaces (one tab) as prefix is not a new section comment
doc_com_body = RE(r"\s*\* ?")
doc_decl = RE(doc_com.pattern + r"(\w+)")
#doc_decl_ident = RE(r"\s*([\w\s]+?)\s*[\(\)]\s*[-:]")
doc_decl_ident = RE(doc_com.pattern + r"(struct|union|enum|typedef|function|macro)\b\s*(\w+)(\(\))?")
doc_decl_purpose = RE(r"[-:](.*)$")
# except pattern like "http://", a whitespace is required after the colon
doc_sect_except = RE(doc_com.pattern + r"[^\s@](.*)?:[^\s]")
#doc_sect = RE(doc_com.pattern + r"([" + doc_special.pattern + r"]?[\w\s]+):(.*)")
# "section header:" names must be unique per function (or struct,union, typedef,
# enum). Additional condition: the header name should have 3 characters at least!
doc_sect = RE(
doc_com_section.pattern
+ r"("
+ r"@\w[^:]*" # "@foo: lorem" or
+ r"|" + r"@\w[.\w]+[^:]*" # "@foo.bar: lorem" or
+ r"|" + r"\@\.\.\." # ellipsis "@...: lorem" or
+ r"|" + r"\w[\w\s]+\w" # e.g. "Return: lorem"
+ r")"
+ r":(.*?)\s*$") # this matches also strings like "http://..." (doc_sect_except)
doc_sect_reST = RE(
doc_com_section.pattern
+ r"("
+ r"@\w[^:]*" # "@foo: lorem" or
+ r"|" + r"@\w[.\w]+[^:]*" # "@foo.bar: lorem" or
+ r"|" + r"\@\.\.\." # ellipsis "@...: lorem" or
# a tribute to vintage markups, when in reST mode ...
+ r"|description|context|returns?|notes?|examples?|introduction|intro"
+ r")"
+ r":(.*?)\s*$" # this matches also strings like "http://..." (doc_sect_except)
, flags = re.IGNORECASE)
reST_sect = RE(
doc_com_section.pattern
+ r"("
r"\w[\w\s]+\w"
+ r")"
+ r":\s*$")
doc_content = RE(doc_com_body.pattern + r"(.*)")
doc_block = RE(doc_com.pattern + r"DOC:\s*(.*)?")
# state: 5 - gathering documentation outside main block
doc_state5_start = RE(r"^\s*/\*\*\s*$")
doc_state5_sect = RE(r"\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)")
doc_state5_end = RE(r"^\s*\*/\s*$")
doc_state5_oneline = RE(r"^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$")
# match expressions used to find embedded type information
type_enum_full = RE(r"(?<=\s)\&(enum)\s*([_\w]+)")
type_struct_full = RE(r"(?<=\s)\&(struct)\s*([_\w]+)")
type_typedef_full = RE(r"(?<=\s)\&(typedef)\s*([_\w]+)")
type_union_full = RE(r"(?<=\s)\&(union)\s*([_\w]+)")
type_member = RE(r"(?<=\s)\&([_\w]+)((\.|->)[_\w]+)")
type_member_func = RE(type_member.pattern + r"\(\)")
type_func = RE(r"(?<=\s)(\w+)(?<!\\)\(\)")
type_constant = RE(r"(?<=\s)\%([-_\w]+)")
type_param = RE(r"(?<=\s)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)")
type_env = RE(r"(?<=\s)(\$\w+)")
type_struct = RE(r"(?<=\s)\&((struct\s*)*[_\w]+)")
esc_type_prefix = RE(r"\\([\@\%\&\$\(])")
CR_NL = RE(r"[\r\n]")
C99_comments = RE(r"//.*$")
C89_comments = RE(r"/\*.*?\*/")
C_STRUCT = RE(r"struct\s+(\w+)\s*{(.*)}")
C_UNION = RE(r"union\s+(\w+)\s*{(.*)}")
C_STRUCT_UNION = RE(r"(struct|union)\s+(\w+)\s*{(.*)}")
C_ENUM = RE(r"enum\s+(\w+)\s*{(.*)}")
C_TYPEDEF = RE(r"typedef.*\s+(\w+)\s*;")
# typedef of a function pointer
_typedef_type = r"((?:\s+[\w\*]+\b){1,8})\s*"
_typedef_ident = r"\*?\s*(\w\S+)\s*"
_typedef_args = r"\s*\((.*)\);"
C_FUNC_TYPEDEF = RE(r"typedef" + _typedef_type + r"\(" + _typedef_ident + r"\)" + _typedef_args)
C_FUNC_TYPEDEF_2 = RE(r"typedef" + _typedef_type + _typedef_ident + _typedef_args)
MACRO = RE(r"^#")
MACRO_define = RE(r"^#\s*define\s+")
SYSCALL_DEFINE = RE(r"^\s*SYSCALL_DEFINE.*\(")
SYSCALL_DEFINE0 = RE(r"^\s*SYSCALL_DEFINE0")
TP_PROTO = RE(r"TP_PROTO\((.*?)\)")
TRACE_EVENT = RE(r"TRACE_EVENT")
TRACE_EVENT_name = RE(r"TRACE_EVENT\((.*?),")
DEFINE_EVENT = RE(r"DEFINE_EVENT")
DEFINE_EVENT_name = RE(r"DEFINE_EVENT\((.*?),(.*?),")
DEFINE_SINGLE_EVENT = RE(r"DEFINE_SINGLE_EVENT")
DEFINE_SINGLE_EVENT_name = RE(r"DEFINE_SINGLE_EVENT\((.*?),")
FUNC_PROTOTYPES = [
# RE(r"^(\w+)\s+\(\*([a-zA-Z0-9_]+)\)\s*\(([^\(]*)\)") # match: void (*foo) (int bar);
RE(r"^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)")
, RE(r"^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
, RE(r"^(\w+\s+\w+\s*\*\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)")
]
# MODULE_AUTHOR("..."); / MODULE_DESCRIPTION("..."); / MODULE_LICENSE("...");
#
MODULE_INFO = RE(r'^\s*(MODULE_)(AUTHOR|DESCRIPTION|LICENSE)\s*\(\s*"([^"]+)"', flags=re.M)
WHITESPACE = RE(r"\s+", flags=re.UNICODE)
[docs]
def normalize_ws(string):
u"""strip needles whitespaces.
Substitute consecutive whitespaces with one single space and strip
trailing/leading whitespaces"""
string = WHITESPACE.sub(" ", string)
return string.strip()
ID_CHARS = RE(r"[^A-Za-z0-9\._]")
[docs]
def normalize_id(ID):
u"""substitude invalid chars of the ID with ``-`` and mak it lowercase"""
return ID_CHARS.sub("-", ID).lower()
RST_DIRECTIVE_PATTERN = r"""
\.\.[ ]+ # explicit markup start
(%s) # directive name
[ ]? # optional space
:: # directive delimiter
([ ]+|$) # whitespace or end of line
"""
RST_CODE_BLOCK = RE(RST_DIRECTIVE_PATTERN % 'code-block', re.VERBOSE | re.UNICODE)
RST_LITERAL_BLOCK = RE(r'(?<!\\)(\\\\)*::$')
RST_INDENT = RE(r"^(\s*)[^\s]")
[docs]
def map_row(row, map_table):
for regexpr, substitute in map_table:
if substitute is not None:
# python has only fixed width lookbehind: add temporarily leading space
row = regexpr.sub(substitute, " " + row)[1:]
return row
[docs]
def highlight_parser(text, map_table):
block_indent = 0
row_indent = 0
state = 'highlight' # [highlight|literal]
out = []
in_rows = text.splitlines()
while in_rows:
row = in_rows.pop(0)
if not row.strip(): # pass-through empty lines & continue
out.append(row)
continue
RST_INDENT.search(row)
indent = len(RST_INDENT[0].expandtabs())
if state == 'highlight':
out.append(map_row(row, map_table))
# prepare next state
if (RST_LITERAL_BLOCK.search(row) or RST_CODE_BLOCK.search(row)):
state = 'literal'
block_indent = row_indent + 1
continue
if state == 'literal':
if indent < block_indent:
# this is a new block, push row back onto the stack and repeat
# the loop
state = 'highlight'
block_indent = indent
in_rows.insert(0, row)
continue
out.append(row)
return "\n".join(out)
# ==============================================================================
# helper
# ==============================================================================
[docs]
def openTextFile(fname, mode="r", encoding="utf-8", errors="strict"):
return codecs.open(fname, mode=mode, encoding=encoding, errors=errors)
[docs]
def readFile(fname, encoding="utf-8", errors="strict"):
with openTextFile(fname, encoding=encoding, errors=errors) as f:
return f.read()
[docs]
class Container(dict):
@property
def __dict__(self):
return self
def __getattr__(self, attr):
return self[attr]
def __setattr__(self, attr, val):
self[attr] = val
class DevNull(object): # pylint: disable=too-few-public-methods
"""A dev/null file descriptor."""
def write(self, *args, **kwargs):
pass
DevNull = DevNull()
SRCTREE = OS_ENV.get("srctree", os.getcwd())
DEFAULT_EXP_METHOD = "macro"
"""Default value of ``:exp-method:``"""
DEFAULT_EXP_IDS = ['EXPORT_SYMBOL', 'EXPORT_SYMBOL_GPL', 'EXPORT_SYMBOL_GPL_FUTURE']
"""Default value of ``:exp-ids:``"""
# ==============================================================================
# Logging stuff
# ==============================================================================
STREAM = Container(
# pipes used by the application & logger
appl_out = sys.__stdout__
, log_out = sys.__stderr__
, )
VERBOSE = False
DEBUG = False
INSPECT = False
[docs]
class SimpleLog(object):
LOG_FORMAT = "%(logclass)s: %(message)s\n"
[docs]
def error(self, message, **replace):
message = message % replace
replace.update(dict(message = message, logclass = "ERROR"))
STREAM.log_out.write(self.LOG_FORMAT % replace)
[docs]
def warn(self, message, **replace):
message = message % replace
replace.update(dict(message = message, logclass = "WARN"))
STREAM.log_out.write(self.LOG_FORMAT % replace)
[docs]
def info(self, message, **replace):
if not VERBOSE:
return
message = message % replace
replace.update(dict(message = message, logclass = "INFO"))
STREAM.log_out.write(self.LOG_FORMAT % replace)
[docs]
def debug(self, message, **replace):
if not DEBUG:
return
message = message % replace
replace.update(dict(message = message, logclass = "DEBUG"))
STREAM.log_out.write(self.LOG_FORMAT % replace)
LOG = SimpleLog()
# ------------------------------------------------------------------------------
[docs]
class TranslatorAPI(object):
# ------------------------------------------------------------------------------
u"""
Abstract kernel-doc translator.
:cvar list cls.HIGHLIGHT_MAP: highlight mapping
:cvar tuple cls.LINE_COMMENT: tuple with start-/end- comment tags
"""
HIGHLIGHT_MAP = [
( type_constant , None )
, ( type_func , None )
, ( type_param , None )
, ( type_struct_full , None )
, ( type_struct , None )
, ( type_enum_full , None )
, ( type_env , None )
, ( type_member_func , None )
, ( type_member , None )
, ]
LINE_COMMENT = ("# ", "")
def __init__(self):
self.options = None
self.parser = None
self.dumped_names = []
self.translated_names = set()
[docs]
def setParser(self, parser):
self.parser = parser
self.dumped_names = []
[docs]
def setOptions(self, options):
self.options = options
[docs]
def highlight(self, text):
u"""returns *highlighted* text"""
if self.options.highlight:
return highlight_parser(text, self.HIGHLIGHT_MAP)
return text
[docs]
def get_preamble(self):
retVal = ""
if self.options.preamble == "":
retVal = self.comment("src-file: %s" % (self.options.rel_fname or self.options.fname))
elif self.options.preamble:
retVal = self.options.preamble % self
return retVal
[docs]
def get_epilog(self):
retVal = ""
if self.options.epilog == "":
retVal = self.comment(
"\nThis file was automatic generated / don't edit.")
elif self.options.epilog:
retVal = self.options.epilog % self
return retVal
[docs]
def write(self, *objects):
u"""Write *objects* to stream.
Write Unicode-values of the *objects* to :py:attr:``self.options.out``.
:param objects: The positional arguments are the objects with the
content to write.
"""
for obj in objects:
cont = six.text_type(obj)
self.options.out.write(cont)
[docs]
def eof(self):
if self.options.eof_newline:
self.write("\n")
# API
# ---
[docs]
def output_preamble(self):
raise NotImplementedError
[docs]
def output_epilog(self):
raise NotImplementedError
[docs]
def output_prefix(self):
raise NotImplementedError
[docs]
def output_suffix(self):
raise NotImplementedError
[docs]
def output_DOC(
self
, sections = None # ctx.sections
, ):
raise NotImplementedError
[docs]
def output_function_decl(
self
, function = None # ctx.decl_name
, return_type = None # ctx.return_type
, parameterlist = None # ctx.parameterlist
, parameterdescs = None # ctx.parameterdescs
, parametertypes = None # ctx.parametertypes
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, decl_type = None # ctx.decl_type
, ):
raise NotImplementedError
[docs]
def output_struct_decl(
self
, decl_name = None # ctx.decl_name
, decl_type = None # ctx.decl_type
, parameterlist = None # ctx.parameterlist
, parameterdescs = None # ctx.parameterdescs
, parametertypes = None # ctx.parametertypes
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, definition = None # ctx.definition
, ):
raise NotImplementedError
[docs]
def output_union_decl(self, *args, **kwargs):
self.output_struct_decl(*args, **kwargs)
[docs]
def output_enum_decl(
self
, enum = None # ctx.decl_name
, parameterlist = None # ctx.parameterlist
, parameterdescs = None # ctx.parameterdescs
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, ):
raise NotImplementedError
[docs]
def output_typedef_decl(
self
, typedef = None # ctx.decl_name
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, ):
raise NotImplementedError
# ------------------------------------------------------------------------------
[docs]
class NullTranslator(TranslatorAPI):
# ------------------------------------------------------------------------------
u"""
Null translator, translates nothing, just parse.
"""
HIGHLIGHT_MAP = []
LINE_COMMENT = ("", "")
# pylint: disable=signature-differs
[docs]
def output_preamble(self, *args, **kwargs):
pass
[docs]
def output_epilog(self, *args, **kwargs):
pass
[docs]
def output_prefix(self):
pass
[docs]
def output_suffix(self):
pass
[docs]
def output_DOC(self, *args, **kwargs):
pass
[docs]
def output_function_decl(self, *args, **kwargs):
pass
[docs]
def output_struct_decl(self, *args, **kwargs):
pass
[docs]
def output_union_decl(self, *args, **kwargs):
pass
[docs]
def output_enum_decl(self, *args, **kwargs):
pass
[docs]
def output_typedef_decl(self, *args, **kwargs):
pass
# ------------------------------------------------------------------------------
[docs]
class ListTranslator(TranslatorAPI):
# ------------------------------------------------------------------------------
u"""
Generates a list of kernel-doc symbols.
"""
def __init__(self, list_exported, list_internal_types
, *args, **kwargs):
super().__init__(*args, **kwargs)
self.list_exported = list_exported
self.list_internal_types = list_internal_types
self.names = dict()
for t in Parser.DOC_TYPES:
self.names[t] = []
[docs]
def get_type(self, name):
for t, l in self.names.items():
if name in l:
return t
return None
[docs]
def output_preamble(self):
pass
[docs]
def output_epilog(self):
pass
[docs]
def output_prefix(self):
pass
[docs]
def output_suffix(self):
pass
[docs]
def output_DOC(self, sections = None):
for header in sections.keys():
self.names["DOC"].append(header)
[docs]
def output_function_decl(self, **kwargs):
if kwargs['decl_type'] == 'macro':
self.names["macro"].append(kwargs["function"])
elif kwargs['decl_type'] == 'typedef':
self.names["typedef"].append(kwargs["function"])
else:
self.names["function"].append(kwargs["function"])
[docs]
def output_struct_decl(self, **kwargs):
self.names["struct"].append(kwargs["decl_name"])
[docs]
def output_union_decl(self, **kwargs):
self.names["union"].append(kwargs["decl_name"])
[docs]
def output_enum_decl(self, **kwargs):
self.names["enum"].append(kwargs["enum"])
[docs]
def output_typedef_decl(self, **kwargs):
self.names["typedef"].append(kwargs["typedef"])
[docs]
def eof(self):
if self.list_exported:
self.parser.info("list exported symbols")
for name in self.parser.ctx.exported_symbols:
t = self.get_type(name)
if t is None:
self.parser.warn("exported symbol '%(name)s' is undocumented"
, name = name)
t = "undocumented"
self.write("[exported %-14s] %s \n" % (t, name))
if self.list_internal_types:
self.parser.info("list internal names")
for t, l in self.names.items():
if not ("all" in self.list_internal_types
or t in self.list_internal_types):
continue
for name in l:
if name not in self.parser.ctx.exported_symbols:
self.write("[internal %-10s] %s \n" % (t, name))
# ------------------------------------------------------------------------------
[docs]
class ReSTTranslator(TranslatorAPI):
# ------------------------------------------------------------------------------
u"""
Translate kernel-doc to reST markup.
:cvar list HIGHLIGHT_map: Escape common reST (in-line) markups. Classic
kernel-doc comments contain characters and strings like ``*`` or
trailing ``_``, which are in-line markups in reST. These special strings
has to be masked in reST.
"""
INDENT = " "
LINE_COMMENT = (".. ", "")
HIGHLIGHT_MAP = [
# the regexpr are partial *overlapping*, mind the order!
( type_enum_full , r"\ :c:type:`\1 \2 <\2>`\ " )
, ( type_struct_full , r"\ :c:type:`\1 \2 <\2>`\ " )
, ( type_typedef_full, r"\ :c:type:`\1 \2 <\2>`\ " )
, ( type_union_full , r"\ :c:type:`\1 \2 <\2>`\ " )
, ( type_member_func , r"\ :c:type:`\1\2() <\1>`\ " )
, ( type_member , r"\ :c:type:`\1\2 <\1>`\ " )
, ( type_func , r"\ :c:func:`\1`\ ")
, ( type_constant , r"\ ``\1``\ " )
, ( type_param , r"\ ``\1``\ " )
, ( type_env , r"\ ``\1``\ " )
, ( type_struct , r"\ :c:type:`struct \1 <\1>`\ ")
# at least replace escaped %, & and $
, ( esc_type_prefix , r"\1")
, ]
MASK_REST_INLINES = [
(RE(r"(\w)_([\s\*])") , r"\1\\_\2") # trailing underline
, (RE(r"([\s\*])_(\w)"), r"\1\\_\2") # leading underline
, (RE(r"(\*)") , r"\\\1") # emphasis
, (RE(r"(`)") , r"\\\1") # interpreted text & inline literals
, (RE(r"(\|)") , r"\\\1") # substitution references
, ]
FUNC_PTR = RE(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)")
BITFIELD = RE(r"^(.*?)\s*(:.*)")
[docs]
def highlight(self, text):
if self.options.markup == "kernel-doc":
text = highlight_parser(text, self.MASK_REST_INLINES + self.HIGHLIGHT_MAP )
elif self.options.markup == "reST":
text = highlight_parser(text, self.HIGHLIGHT_MAP )
return text
[docs]
def write_anchor(self, refname):
ID = refname
if self.options.id_prefix:
ID = self.options.id_prefix + "." + ID
ID = normalize_id(ID)
self.write("\n.. _`%s`:\n" % ID)
HEADER_TAGS = (
"#" # level 0 / part with overline
"=" # level 1 / chapter with overline
"=" # level 2 / sec
"-" # level 3 / subsec
"-" # level 4 / subsubsec
'"' ) # level 5 / para
[docs]
def write_section(self, header, content, sec_level=2, ID=None):
if not self.options.no_header:
if ID:
self.write_anchor(ID)
self.write_header(header, sec_level=sec_level)
if header.lower() == "example":
self.write("\n.. code-block:: c\n\n")
for l in textwrap.dedent(content).split("\n"):
if not l.strip():
self.write("\n")
else:
self.write(self.INDENT, l, "\n")
else:
content = self.format_block(content)
content = self.highlight(content)
self.write("\n" + content)
self.write("\n")
[docs]
def write_definition(self, term, definition, prefix=""):
term = normalize_ws(term) # term has to be a "one-liner"
term = self.highlight(term)
if definition != Parser.undescribed:
definition = self.format_block(definition)
definition = self.highlight(definition)
self.write("\n", prefix, term)
for l in textwrap.dedent(definition).split("\n"):
self.write("\n", prefix)
if l.strip():
self.write(self.INDENT, l)
self.write("\n")
[docs]
def write_func_param(self, param, param_type, descr):
param = param.replace("*", r"\*")
self.write("\n", self.INDENT, param)
if descr != Parser.undescribed:
descr = self.format_block(descr)
descr = self.highlight(descr)
for l in textwrap.dedent(descr).split("\n"):
self.write("\n")
if l.strip():
self.write(self.INDENT * 2, l)
if param_type:
param_type = param_type.replace("*", r"\*")
self.write("\n", self.INDENT, param_type)
self.write("\n")
[docs]
def output_preamble(self):
self.parser.ctx.offset = 0
if self.options.mode_line:
self.write_comment(
"-*- coding: %s; mode: rst -*-\n"
% (getattr(self.options.out, "encoding", "utf-8") or "utf-8").lower())
preamble = self.get_preamble()
if preamble:
self.write(preamble, "\n")
if self.options.top_title:
self.write_anchor(self.options.top_title)
self.write_header(self.options.top_title, 0)
if self.options.top_link:
self.write("\n", self.options.top_link % self.options, "\n")
[docs]
def output_epilog(self):
self.parser.ctx.offset = 0
epilog = self.get_epilog()
if epilog:
self.write(epilog, "\n")
[docs]
def output_prefix(self):
if compat.sphinx_has_c_namespace() and self.options.id_prefix:
self.write(".. c:namespace-push:: %s" % self.options.id_prefix, "\n")
[docs]
def output_suffix(self):
if compat.sphinx_has_c_namespace() and self.options.id_prefix:
self.write("\n", ".. c:namespace-pop::", "\n")
[docs]
def output_DOC(self, sections = None):
self.parser.ctx.offset = self.parser.ctx.decl_offset
for header, content in sections.items():
self.write_section(header, content, sec_level=2, ID=header)
[docs]
def output_function_decl(
self
, function = None # ctx.decl_name
, return_type = None # ctx.return_type
, parameterlist = None # ctx.parameterlist
, parameterdescs = None # ctx.parameterdescs
, parametertypes = None # ctx.parametertypes
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, decl_type = None # ctx.decl_type
):
self.parser.ctx.offset = self.parser.ctx.decl_offset
_c_type = decl_type
if _c_type == 'typedef':
# a function typedef is rendered as function
_c_type = 'function'
self.write_anchor(function)
_header_prefix = ''
if decl_type in ('macro', 'typedef'):
_header_prefix = '%s ' % decl_type
self.write_header(_header_prefix + function, sec_level=2)
if self.options.man_sect:
self.write("\n.. kernel-doc-man:: %s.%s\n" % (function, self.options.man_sect) )
self.write("\n.. c:%s:: " % _c_type)
if return_type and re.search(r"\s\*+$", return_type):
self.write(return_type, function, "(")
else:
self.write(return_type, " ", function, "(")
p_list = []
for p_name in parameterlist:
p_type = parametertypes[p_name]
if self.FUNC_PTR.search(p_type):
# pointer to function
p_list.append("%s%s)(%s)"
% (self.FUNC_PTR[0], p_name, self.FUNC_PTR[1]))
elif re.search(r"\s\*+$", p_type):
# pointer
p_list.append("%s%s" % (p_type, p_name))
else:
p_list.append("%s %s" % (p_type, p_name))
p_line = ", ".join(p_list)
self.write(p_line, ")\n")
# purpose
if purpose:
self.write("\n", self.INDENT, self.highlight(purpose), "\n")
# parameter descriptions
for p_name in parameterlist:
p_type = parametertypes[p_name]
p_name = re.sub(r"\[.*", "", p_name)
if p_name != "..." and "." in p_name:
# @foo.bar sub-descriptions are printed below, ignore them here
continue
p_desc = parameterdescs[p_name]
param = ""
param_type = None
if p_name == "...":
param = ":param ellipsis ellipsis:"
else:
param = ":param %s:" % (p_name.replace('_', r'\_'))
if p_type:
param_type = ":type %s: %s" % (p_name.replace('_', r'\_'), p_type.replace('_', r'\_'))
self.parser.ctx.offset = parameterdescs.offsets.get(
p_name, self.parser.ctx.offset)
self.write_func_param(param, param_type, p_desc)
# print all the @foo.bar sub-descriptions
sub_descr = [x for x in parameterdescs.keys() if x.startswith(p_name + ".")]
for _p_name in sub_descr:
p_desc = parameterdescs.get(_p_name, None)
# do not print undescribed sub-descriptions
if p_desc == self.parser.undescribed:
continue
self.parser.ctx.offset = parameterdescs.offsets.get(
_p_name, self.parser.ctx.offset)
self.write_definition(_p_name, p_desc)
# sections
for header, content in sections.items():
self.parser.ctx.offset = sections.offsets[header]
self.write_section(
header
, content
, sec_level = 3
, ID = function + "." + header)
[docs]
def output_struct_decl(
self
, decl_name = None # ctx.decl_name
, decl_type = None # ctx.decl_type
, parameterlist = None # ctx.parameterlist
, parameterdescs = None # ctx.parameterdescs
, parametertypes = None # ctx.parametertypes
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, definition = None # ctx.definition
, ):
self.parser.ctx.offset = self.parser.ctx.decl_offset
self.write_anchor(decl_name)
self.write_header("%s %s" % (decl_type, decl_name), sec_level=2)
if self.options.man_sect:
self.write("\n.. kernel-doc-man:: %s.%s\n" % (decl_name, self.options.man_sect) )
# write struct definition
# see https://github.com/sphinx-doc/sphinx/issues/2713
if compat.sphinx_has_c_types():
self.write("\n.. c:%s:: %s\n\n" % (decl_type, decl_name))
else:
self.write("\n.. c:type:: %s %s\n\n" % (decl_type, decl_name))
# purpose
if purpose:
self.write(self.INDENT, self.highlight(purpose), "\n")
# definition
self.write_anchor(decl_name + "." + Parser.section_def)
self.write_header(Parser.section_def, sec_level=3)
self.write("\n.. code-block:: c\n\n")
self.write(self.INDENT, decl_type, " ", decl_name, " {\n")
definition = re.sub(r"(([{;]))", r"\1\n", definition)
level = 2
enum = False
for clause in definition.split('\n'):
clause = normalize_ws(clause)
if not clause:
continue
if clause[0] == "}" and level > 2:
level -= 1
if MACRO.match(clause):
self.write(self.INDENT, clause[:-1].strip(), '\n')
elif enum:
for l in clause.split(','):
l = normalize_ws(l)
if l[0] == "}" and level > 2:
level -= 1
self.write(self.INDENT * level, l, '\n')
else:
self.write(self.INDENT * level, l, ',\n')
else:
self.write(self.INDENT * level, clause, '\n')
if clause[-1] == "{":
level += 1
enum = clause.startswith('enum')
self.write(self.INDENT, "}\n")
# member description
self.write_anchor(decl_name + "." + Parser.section_members)
self.write_header(Parser.section_members, sec_level=3)
for p_name in parameterlist:
if MACRO.match(p_name):
continue
p_name = re.sub(r"\[.*", "", p_name)
if "." in p_name:
# @foo.bar sub-descriptions are printed below, ignore them here
continue
p_desc = parameterdescs.get(p_name, None)
if p_desc is not None:
self.parser.ctx.offset = parameterdescs.offsets.get(
p_name, self.parser.ctx.offset)
self.write_definition(p_name, p_desc)
# print all the @foo.bar sub-descriptions
sub_descr = [x for x in parameterdescs.keys() if x.startswith(p_name + ".")]
for _p_name in sub_descr:
p_desc = parameterdescs.get(_p_name, None)
# do not print undescribed sub-descriptions
if p_desc == self.parser.undescribed:
continue
self.parser.ctx.offset = parameterdescs.offsets.get(
_p_name, self.parser.ctx.offset)
self.write_definition(_p_name, p_desc)
# sections
for header, content in sections.items():
self.parser.ctx.offset = sections.offsets[header]
self.write_section(
header
, content
, sec_level = 3
, ID = decl_name + "." + header)
[docs]
def output_enum_decl(
self
, enum = None # ctx.decl_name
, parameterlist = None # ctx.parameterlist
, parameterdescs = None # ctx.parameterdescs
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, ):
self.parser.ctx.offset = self.parser.ctx.decl_offset
self.write_anchor(enum)
self.write_header("enum %s" % enum, sec_level=2)
if self.options.man_sect:
self.write("\n.. kernel-doc-man:: %s.%s\n" % (enum, self.options.man_sect) )
# write union definition
# see https://github.com/sphinx-doc/sphinx/issues/2713
if compat.sphinx_has_c_types():
self.write("\n.. c:enum:: %s\n\n" % enum)
else:
self.write("\n.. c:type:: enum %s\n\n" % enum)
# purpose
if purpose:
self.write(self.INDENT, self.highlight(purpose), "\n")
# definition
self.write_anchor(enum + "." + Parser.section_def)
self.write_header(Parser.section_def, sec_level=3)
self.write("\n.. code-block:: c\n\n")
self.write(self.INDENT, "enum ", enum, " {")
e_list = parameterlist[:]
while e_list:
e = e_list.pop(0)
if MACRO.match(e):
self.write("\n", self.INDENT, e)
else:
self.write("\n", self.INDENT * 2, e)
if e_list:
self.write(",")
self.write("\n", self.INDENT, "};\n")
# constants description
self.write_anchor(enum + "." + Parser.section_constants)
self.write_header(Parser.section_constants, sec_level=3)
for p_name in parameterlist:
p_desc = parameterdescs.get(p_name, None)
self.parser.ctx.offset = parameterdescs.offsets.get(
p_name, self.parser.ctx.offset)
if p_desc is None:
continue
self.write_definition(p_name, p_desc)
# sections
for header, content in sections.items():
self.parser.ctx.offset = sections.offsets[header]
self.write_section(
header
, content or "???"
, sec_level = 3
, ID = enum + "." + header)
[docs]
def output_typedef_decl(
self
, typedef = None # ctx.decl_name
, sections = None # ctx.sections
, purpose = None # ctx.decl_purpose
, ):
self.parser.ctx.offset = self.parser.ctx.decl_offset
self.write_anchor(typedef)
self.write_header("typedef %s" % typedef, sec_level=2)
if self.options.man_sect:
self.write("\n.. kernel-doc-man:: %s.%s\n" % (typedef, self.options.man_sect) )
# write typdef definition
# see https://github.com/sphinx-doc/sphinx/issues/2713
if compat.sphinx_has_c_types():
self.write("\n.. c:type:: %s\n\n" % typedef)
else:
self.write("\n.. c:type:: typedef %s\n\n" % typedef)
if purpose:
self.write(self.INDENT, self.highlight(purpose), "\n")
for header, content in sections.items():
self.parser.ctx.offset = sections.offsets[header]
self.write_section(
header
, content or "???"
, sec_level = 3
, ID = typedef + "." + header)
# ------------------------------------------------------------------------------
[docs]
class ParseOptions(Container):
# ------------------------------------------------------------------------------
# pylint: disable=too-many-instance-attributes
# pylint: disable=global-statement
PARSE_OPTION_RE = r"^/\*+\s*parse-%s:\s*([a-zA-Z0-9_-]*?)\s*\*/+\s*$"
PARSE_OPTIONS = [
("highlight", ["on","off"], "setOnOff")
, ("INSPECT", ["on","off"], "setINSPECT")
, ("markup", ["reST", "kernel-doc"], "setVal")
, ("SNIP", [], "setVal")
, ("SNAP", [], "snap")
, ]
[docs]
def dumpOptions(self):
# dumps options which are variable from parsing source-code
return dict(
highlight = self.highlight
, markup = self.markup )
def __init__(self, *args, **kwargs):
self.id_prefix = None # A prefix for generated IDs.
self.out = None # File descriptor for output.
self.eof_newline = True # write newline on end of file
self.src_tree = SRCTREE # root folder of sources (defaults to CWD)
self.rel_fname = "" # pathname relative to src_tree
self.fname = "" # absolute pathname
# self.encoding: the input encoding (encoding of the parsed source
# file), the output encoding could be seek from the file-descriptor at
# self.out.
self.encoding = "utf-8"
self.tab_width = 8 # tab-stops every n chars
# control which content to print
self.use_names = [] # positive list of names to print / empty list means "print all"
self.skip_names = [] # negative list of names (not to print)
self.use_all_docs = False # True/False print all "DOC:" sections
self.no_header = False # skip section header
self.error_missing = True # report missing names as errors / else warning
self.verbose_warn = True # more warn messages
# self.gather_context: [True/False] Scan additional context from the
# parsed source. E.g.: The list of exported symbols is a part of the
# parser's context. If the context of exported symbols is needed, we
# have to parse twice. First to find exported symbols, store them in the
# context and a second once for *normal* parsing within this modified
# *context*.
self.gather_context = False
self.exp_method = None
self.exp_ids = []
self.known_attrs = []
# epilog / preamble
self.skip_preamble = False
self.skip_epilog = False
self.mode_line = True # write mode-line in the very first line
self.top_title = "" # write a title on top of the preamble
self.top_link = "" # if top_title, add link to the *top* of the preamble
self.preamble = "" # additional text placed into the preamble
self.epilog = "" # text placed into the epilog
# default's of filtered PARSE_OPTIONS
self.opt_filters = dict()
self.markup = "reST"
self.highlight = True # switch highlighting on/off
self.man_sect = None # insert ".. kernel-doc-man:" directive, section no self.man_sect
self.add_filters(self.PARSE_OPTIONS)
# SNIP / SNAP
self.SNIP = None
# init options with arguments from caller
super().__init__(self, *args, **kwargs)
# absolute and relativ filename
if not self.fname:
LOG.error("no source file given!")
self.rel_fname = self.fname
if self.fname[0] == '/':
if not self.src_tree:
LOG.error("missing SRCTREE")
self.rel_fname = self.fname[1:]
self.fname = os.path.abspath(str(self.src_tree) + "/" + str(self.rel_fname))
[docs]
def set_defaults(self):
# default way to identify exported symbol
if not self.exp_method:
self.exp_method = DEFAULT_EXP_METHOD
if not self.exp_ids:
self.exp_ids = DEFAULT_EXP_IDS
# default top title and top link
if self.fname and self.top_title == "":
self.top_title = os.path.basename(self.fname)
if self.top_title:
self.top_title = self.top_title % self
if self.top_link:
self.top_link = self.top_link % self
[docs]
def add_filters(self, parse_options):
def setINSPECT(name, val): # pylint: disable=unused-argument
global INSPECT
INSPECT = bool(val == "on")
_actions = dict(
setOnOff = lambda name, val: ( name, bool(val == "on") )
, setVal = lambda name, val: ( name, val )
, snap = lambda name, val: ( "SNIP", "" )
, setINSPECT = setINSPECT
, )
for option, val_list, action in parse_options:
self.opt_filters[option] = (
RE(self.PARSE_OPTION_RE % option), val_list, _actions[action])
[docs]
def filter_opt(self, line, parser):
for name, (regexpr, val_list, action) in self.opt_filters.items():
if regexpr.match(line):
line = None
value = regexpr[0]
if val_list and value not in val_list:
parser.error("unknown parse-%(name)s value: '%(value)s'"
, name=name, value=value)
else:
opt_val = action(name, value)
if opt_val is not None:
name, value = opt_val
self[name] = value
parser.info(
"set parse-option: %(name)s = '%(value)s'"
, name=name, value=value)
break
return line
[docs]
def get_exported_symbols_re(self):
if self.exp_method == 'macro':
proto_pattern = r"^\s*(?:%s)\s*\(\s*(\w*)\s*\)\s*"
elif self.exp_method == 'attribute':
proto_pattern = r"(?:%s)(?:\s+\**\w+\**)*?\s+\**(\w+)\s*[(;]+"
else:
LOG.error("Unknown exported symbol method: %s" % self.exp_method)
id_pattern = "|".join(["(?:" + name + ")" for name in self.exp_ids])
return RE(proto_pattern % id_pattern, flags=re.M)
# ------------------------------------------------------------------------------
[docs]
class ParserContext(Container):
# ------------------------------------------------------------------------------
# pylint: disable=too-many-instance-attributes
[docs]
def dumpCtx(self):
# dumps options which are variable from parsing source-code
return dict(
decl_offset = self.decl_offset )
def __init__(self, *args, **kwargs):
self.line_no = 0
self.contents = ""
self.section = Parser.section_default
# self.sections: ordered dictionary (list) of sections as they appear in
# the source. The sections are set by Parser.dump_section
self.sections = collections.OrderedDict()
self.sectcheck = []
self.prototype = ""
self.last_identifier = ""
# self.parameterlist: ordered list of the parameters as they appear in
# the source. The parameter-list is set by Parser.push_parameter and
# Parser.dump_enum
self.parameterlist = []
# self.parametertypes: dictionary of <parameter-name>:<type>
# key/values of the parameters. Set by Parser.push_parameter
self.parametertypes = dict()
# self.parameterdescs: dictionary of <'@parameter'>:<description>
# key/values of the parameters. Set by Parser.dump_section
self.parameterdescs = collections.OrderedDict()
# self.constants: dictionary of <'%CONST'>:<description>
# key/values. Set by Parser.dump_section
self.constants = dict()
self.decl_name = ""
self.decl_type = "" # [struct|union|enum|typedef|function|macro]
self.decl_purpose = ""
self.definition = "" # defintion of the struct|union|enum
self.return_type = "" # function's return type definition)
#self.struct_actual = ""
# Additional context from the parsed source
# self.exported: list of exported symbols
self.exported_symbols = []
# self.mod_xxx: Module informations
self.mod_authors = []
self.mod_descr = ""
self.mod_license = ""
# SNIP / SNAP
self.snippets = collections.OrderedDict()
# the place, where type dumps are stored
self.dump_storage = []
# memo line numbers
self.offset = 0
self.last_offset = 0
self.decl_offset = 0
self.sections.offsets = dict()
self.parameterdescs.offsets = dict()
super().__init__(self, *args, **kwargs)
[docs]
def new(self):
return self.__class__(
line_no = self.line_no
, exported_symbols = self.exported_symbols
, snippets = self.snippets
, dump_storage = self.dump_storage )
[docs]
class ParserBuggy(RuntimeError):
u"""Exception raised when the parser implementation seems buggy.
The parser implementation perform some integrity tests at runtime. This
exception type mainly exists to improve the regular expressions which are
used to parse and analyze the kernels source code.
In the exception message the last position the parser parsed is stored, this
position may, but does not need to be related with the exception (it is only
an additional information which might help).
Under normal circumstances, exceptions of this type should never arise,
unless the implementation of the parser is buggy."""
def __init__(self, parserObj, message):
message = ("last parse position %s:%s\n"
% (parserObj.ctx.line_no, parserObj.options.fname)
+ message)
super().__init__(message)
self.parserObj = parserObj
# ------------------------------------------------------------------------------
[docs]
class Parser(SimpleLog):
# ------------------------------------------------------------------------------
# pylint: disable=too-many-public-methods
u"""
kernel-doc comments parser
States:
* 0 - normal code
* 1 - looking for function name
* 2 - scanning field start.
* 3 - scanning prototype.
* 4 - documentation block
* 5 - gathering documentation outside main block (see Split Doc State)
Split Doc States:
* 0 - Invalid (Before start or after finish)
* 1 - Is started (the /\\*\\* was found inside a struct)
* 2 - The @parameter header was found, start accepting multi paragraph text.
* 3 - Finished (the \\*/ was found)
* 4 - Error: Comment without header was found. Spit a error as it's not
proper kernel-doc and ignore the rest.
"""
LOG_FORMAT = "%(fname)s:%(line_no)s: :%(logclass)s: %(message)s\n"
# DOC_TYPES: types of documentation gathered by the parser
DOC_TYPES = ["DOC", "function", "struct", "union", "enum", "typedef", "macro"]
undescribed = "*undescribed*"
section_descr = "Description"
section_def = "Definition"
section_members = "Members"
section_constants = "Constants"
section_intro = "Introduction"
section_context = "Context"
section_return = "Return"
section_default = section_descr
special_sections = [ section_descr
, section_def
, section_members
, section_constants
, section_context
, section_return ]
def __init__(self, options, translator):
super().__init__()
# raw data akku
self.rawdata = ""
# flags:
self.state = 0
self.split_doc_state = 0
self.in_doc_sect = False
self.in_purpose = False
self.brcount = 0
self.warnings = 0
self.errors = 0
self.anon_struct_union = False
self.options = None
self.translator = None
self.ctx = ParserContext()
self.setTranslator(translator)
self.setOptions(options)
[docs]
def setTranslator(self, translator):
self.translator = translator
self.translator.setParser(self)
self.translator.setOptions(self.options)
[docs]
def setOptions(self, options):
self.options = options
self.translator.setOptions(options)
[docs]
def reset_state(self):
self.ctx = self.ctx.new()
self.state = 0
self.split_doc_state = 0
self.in_doc_sect = False
self.in_purpose = False
self.brcount = 0
self.anon_struct_union = False
# ------------------------------------------------------------
# Log
# ------------------------------------------------------------
[docs]
def error(self, message, _line_no=None, **replace):
replace["fname"] = self.options.fname
replace["line_no"] = replace.get("line_no", self.ctx.line_no)
self.errors += 1
super().error(message, **replace)
[docs]
def warn(self, message, _line_no=None, **replace):
replace["fname"] = self.options.fname
replace["line_no"] = replace.get("line_no", self.ctx.line_no)
self.warnings += 1
super().warn(message, **replace)
[docs]
def info(self, message, _line_no=None, **replace):
replace["fname"] = self.options.fname
replace["line_no"] = replace.get("line_no", self.ctx.line_no)
super().info(message, **replace)
[docs]
def debug(self, message, _line_no=None, **replace):
replace["fname"] = self.options.fname
replace["line_no"] = replace.get("line_no", self.ctx.line_no)
super().debug(message, **replace)
# ------------------------------------------------------------
# state parser
# ------------------------------------------------------------
[docs]
@classmethod
def gather_context(cls, src, ctx, opts):
u"""Scan source about context informations.
Scans *whole* source (e.g. :py:attr:`Parser.rawdata`) about data relevant
for the context (e.g. exported symbols).
Names of exported symbols gathered in :py:attr:`ParserContext.exported`.
The list contains names (symbols) which are exported using the
pattern specified in opts.
.. hint::
A exported symbol does not necessarily have a corresponding source code
comment with a documentation.
Module information comes from the ``MODULE_xxx`` macros. Module
informations are gathered in ``ParserContext.module_xxx``:
* ``MODULE_AUTHOR("...")``: Author entries are collected in a list in
:py:attr:`ParserContext.mod_authors`
* ``MODULE_DESCRIPTION("...")``: A concatenated string in
:py:attr:`ParserContext.mod_descr`
* ``MODULE_LICENSE("...")``: String with comma separated licenses in
:py:attr:`ParserContext.mod_license`.
.. hint::
While parsing header files, about kernel-doc, you will not find the
``MODULE_xxx`` macros, because they are commonly used in the ".c"
files.
"""
expsym_re = opts.get_exported_symbols_re()
LOG.debug("gather_context() regExp: %(pattern)s", pattern=expsym_re.pattern)
for name in expsym_re.findall(src):
LOG.info("exported symbol: %(name)s", name = name)
ctx.exported_symbols.append(name)
LOG.debug("gather_context() regExp: %(pattern)s", pattern=MODULE_INFO.pattern)
for match in MODULE_INFO.findall(src):
info_type = match[1]
content = match[2]
if info_type == "AUTHOR":
ctx.mod_authors.append(content)
elif info_type == "DESCRIPTION":
ctx.mod_descr += content + " "
elif info_type == "LICENSE":
ctx.mod_license += content + ", "
LOG.info("mod_authors: %(x)s", x = ctx.mod_authors)
LOG.info("mod_descr: %(x)s", x = ctx.mod_descr)
LOG.info("mod_license : %(x)s", x = ctx.mod_license)
[docs]
def parse(self, src=None): # start parsing
self.dump_preamble()
self.dump_prefix()
if src is not None:
for line in src:
self.feed(line)
else:
with openTextFile(self.options.fname, encoding=self.options.encoding) as srcFile:
for line in srcFile:
self.feed(line)
self.dump_suffix()
self.dump_epilog()
self.translator.eof()
[docs]
def parse_dump_storage(self, translator=None, options=None):
if options is not None:
self.setOptions(options)
if translator is not None:
self.setTranslator(translator)
self.dump_preamble()
self.dump_prefix()
for name, out_type, opts, ctx, kwargs in self.ctx.dump_storage:
self.options.update(opts)
self.ctx.update(ctx)
self.output_decl(name, out_type, **kwargs)
self.dump_suffix()
self.dump_epilog()
self.translator.eof()
[docs]
def close(self): # end parsing
self.feed("", eof=True)
# log requested but missed documentation
log_missed = self.error
if not self.options.error_missing:
log_missed = self.warn
if isinstance(self.translator, NullTranslator):
# the NullTranslator does not translate / translated_names is
# empty
pass
elif isinstance(self.translator, ListTranslator):
self.parse_dump_storage()
else:
for name in self.options.use_names:
if name not in self.translator.translated_names:
log_missed("no documentation for '%(name)s' found", name=name)
if self.errors or self.warnings:
self.warn("total errors: %(errors)s / total warnings: %(warnings)s"
, errors=self.errors, warnings=self.warnings)
self.warnings -= 1
global INSPECT # pylint: disable=global-statement
INSPECT = False
[docs]
def feed(self, data, eof=False):
self.rawdata = self.rawdata + data
if self.options.gather_context:
# Scan additional context from the parsed source. For this, collect
# all lines in self.rawdata until EOF. On EOF, scan rawdata about
# (e.g.) exported symbols and after this, continue with the *normal*
# parsing.
if not eof: # pylint: disable=no-else-return
return
else:
self.gather_context(self.rawdata, self.ctx, self.options)
lines = self.rawdata.split("\n")
if not eof:
# keep last line, until EOF
self.rawdata = lines[-1]
lines = lines[:-1]
for l in lines:
l = l.expandtabs(self.options.tab_width)
self.ctx.line_no += 1
l = self.options.filter_opt(l, self)
if l is None:
continue
if self.options.SNIP:
# record snippet
val = self.ctx.snippets.get(self.options.SNIP, "")
if val or l:
self.ctx.snippets[self.options.SNIP] = val + l + "\n"
state = getattr(self, "state_%s" % self.state)
try:
state(l)
except Exception as _exc:
self.warn("total errors: %(errors)s / warnings: %(warnings)s"
, errors=self.errors, warnings=self.warnings)
self.warnings -= 1
self.error("unhandled exception in line: %(l)s", l=l)
raise
[docs]
def output_decl(self, name, out_type, **kwargs):
self.ctx.offset = self.ctx.decl_offset
if name in self.translator.dumped_names:
self.error("name '%s' used several times" % name)
self.translator.dumped_names.append(name)
if isinstance(self.translator, NullTranslator):
self.ctx.dump_storage.append(
( name
, out_type
, self.options.dumpOptions()
, self.ctx.dumpCtx()
, copy.deepcopy(kwargs) ) )
return
do_translate = False
if name in self.options.skip_names:
do_translate = False
elif name in self.options.use_names:
do_translate = True
elif out_type != "DOC" and not self.options.use_names:
do_translate = True
elif out_type == "DOC" and self.options.use_all_docs:
do_translate = True
if do_translate:
self.translator.translated_names.add(name)
out_func = getattr(self.translator, "output_%s" % out_type)
out_func(**kwargs)
else:
self.debug("skip translation of %(t)s: '%(n)s'", t=out_type, n=name)
[docs]
def state_0(self, line):
u"""state: 0 - normal code"""
if doc_start.match(line):
self.debug("START: kernel-doc comment / switch state 0 --> 1")
self.ctx.decl_offset = self.ctx.line_no + 1
self.state = 1
self.in_doc_sect = False
[docs]
def state_1(self, line):
u"""state: 1 - looking for function name"""
if doc_block.match(line):
self.debug("START: DOC block / switch state 1 --> 4")
self.ctx.last_offset = self.ctx.line_no + 1
self.state = 4
self.ctx.contents = ""
self.ctx.section = self.section_intro
if doc_block[0].strip():
self.ctx.section = self.sect_title(doc_block[0])
self.info("DOC: %(sect)s", sect=self.ctx.section)
elif doc_decl.match(line):
self.debug("START: declaration / switch state 1 --> 2")
self.ctx.last_offset = self.ctx.line_no + 1
self.state = 2
identifier = doc_decl[0].strip()
self.ctx.decl_type = "function"
if doc_decl_ident.match(line):
identifier = doc_decl_ident[1]
self.ctx.decl_type = doc_decl_ident[0]
self.ctx.last_identifier = identifier.strip()
self.debug("FLAG: in_purpose=True")
self.in_purpose = True
self.info("scanning doc for: %(t)s '%(i)s'", t=self.ctx.decl_type, i = identifier)
self.ctx.decl_purpose = ""
if doc_decl_purpose.search(line):
self.ctx.decl_purpose = doc_decl_purpose[0].strip()
if not self.ctx.decl_purpose:
self.warn("missing initial short description of '%(i)s'"
, i=self.ctx.last_identifier)
else:
self.warn("can't understand: -->|%(line)s|<--"
" - I thought it was a doc line" , line=line)
self.state = 0
[docs]
def sect_title(self, title):
u"""Normalize common section titles"""
# fix varius notations for the "Return:" section
retVal = title
if title.lower() in ["description", ]:
retVal = self.section_descr
elif title.lower() in ["introduction", "intro"]:
retVal = self.section_intro
elif title.lower() in ["context", ]:
retVal = self.section_context
elif title.lower() in ["return", "returns"]:
retVal = self.section_return
return retVal
[docs]
def state_2(self, line):
u"""state: 2 - scanning field start. """
new_sect = ""
new_cont = ""
if not doc_sect_except.match(line):
# probe different sect start pattern ...
if self.options.markup == "reST":
if doc_sect_reST.match(line):
# this is a line with a parameter definition or vintage
# section "Context: lorem", "Return: lorem" etc.
new_sect = self.sect_title(doc_sect_reST[0].strip())
new_cont = doc_sect_reST[1].strip()
elif reST_sect.match(line):
# this is a line with a section definition "Section name:\n"
new_sect = self.sect_title(reST_sect[0].strip())
new_cont = ""
# Sub-sections in parameter descriptions are not provided,
# with the exception of special_sections names. To allow
# comments like:
# * @arg: lorem
# * Return: foo
if ( new_sect
and self.ctx.section.startswith("@")
and not new_sect.startswith("@")
and not new_sect in self.special_sections ):
new_sect = ""
new_cont = ""
else: # kernel-doc vintage mode
if doc_sect.match(line):
# this is a line with a parameter or section definition
new_sect = self.sect_title(doc_sect[0].strip())
new_cont = doc_sect[1].strip()
if new_sect:
# a new section starts *here*
self.debug("found new section --> %(sect)s", sect=new_sect)
if self.ctx.contents.strip():
if not self.in_doc_sect:
self.warn("contents before sections '%(c)s'" , c=self.ctx.contents.strip())
self.dump_section(self.ctx.section, self.ctx.contents)
self.ctx.section = self.section_default
self.ctx.contents = ""
self.debug("new_sect: '%(sec)s' / desc: '%(desc)s'", sec = new_sect, desc = new_cont)
self.ctx.last_offset = self.ctx.line_no
self.in_doc_sect = True
self.in_purpose = False
self.debug("FLAGs: in_doc_sect=%(s)s / in_purpose=%(p)s", s=self.in_doc_sect, p=self.in_purpose)
self.ctx.section = new_sect
if new_cont:
self.ctx.contents = new_cont + "\n"
self.info("section: %(sec)s" , sec=self.ctx.section)
elif doc_end.search(line):
# end of the comment-block
if self.ctx.contents:
self.dump_section(self.ctx.section, self.ctx.contents)
self.ctx.section = self.section_default
self.ctx.contents = ""
# look for doc_com + <text> + doc_end:
if RE(doc_com.pattern + r"[a-zA-Z_0-9:\.]+" + doc_end.pattern).match(line):
self.warn("suspicious ending line")
self.ctx.prototype = ""
self.debug("END doc block / switch state 2 --> 3")
self.debug("end of doc comment, looking for prototype")
self.state = 3
self.brcount = 0
elif doc_content.match(line):
# a comment line with *content* of a section or a *purpose*
cont_line = doc_content[0]
if not cont_line.strip():
# it's a empty line
if self.in_purpose:
# empty line after short description (*purpose*) introduce the
# "Description" section
self.debug("found empty line in *purpose* --> start 'Description' section")
if self.ctx.contents.strip():
if not self.in_doc_sect:
self.warn("contents before sections '%(c)s'" , c=self.ctx.contents.strip())
self.dump_section(self.ctx.section, self.ctx.contents)
self.ctx.section = self.section_descr
self.ctx.contents = ""
self.in_doc_sect = True
self.in_purpose = False
self.debug("FLAGs: in_doc_sect=%(s)s / in_purpose=%(p)s", s=self.in_doc_sect, p=self.in_purpose)
elif (self.ctx.section.startswith("@")
or self.ctx.section == self.section_context):
# miguel-style comment kludge, look for blank lines after @parameter
# line to signify start of description
self.debug("blank lines after @parameter --> start 'Description' section")
self.dump_section(self.ctx.section, self.ctx.contents)
self.ctx.last_offset = self.ctx.line_no
self.ctx.section = self.section_descr
self.ctx.contents = ""
self.in_doc_sect = True
self.debug("FLAGs: in_doc_sect=%(s)s / in_purpose=%(p)s", s=self.in_doc_sect, p=self.in_purpose)
else:
self.ctx.contents += "\n"
elif self.in_purpose:
# Continued declaration purpose, dismiss leading whitespace
if self.ctx.decl_purpose:
self.ctx.decl_purpose += " " + cont_line.strip()
else:
self.ctx.decl_purpose = cont_line.strip()
else:
if ( self.options.markup == "reST"
and self.ctx.section.startswith("@")):
# I doubt if it is a good idea to strip leading whitespaces
# in parameter description, but *over all* we get better
# reST output.
cont_line = cont_line.strip()
# Sub-sections in parameter descriptions are not provided,
# but if this is a "lorem:\n" line create a new paragraph.
if reST_sect.match(line) and not doc_sect_except.match(line):
cont_line = "\n" + cont_line + "\n"
self.ctx.contents += cont_line + "\n"
else:
# i dont know - bad line? ignore.
self.warn("bad line: '%(line)s'", line = line.strip())
[docs]
def state_3(self, line):
u"""state: 3 - scanning prototype."""
if line.startswith('typedef'):
if not self.ctx.decl_type == 'typedef':
self.warn(
"typedef of function pointer not marked"
" as typdef, use: 'typedef %s' in the comment."
% (self.ctx.last_identifier)
, line_no = self.ctx.decl_offset)
self.ctx.decl_type = 'typedef'
if doc_state5_oneline.match(line):
sect = doc_state5_oneline[0].strip()
cont = doc_state5_oneline[1].strip()
if cont and sect:
self.ctx.section = self.sect_title(sect)
self.ctx.contents = cont
self.dump_section(self.ctx.section, self.ctx.contents)
self.ctx.section = self.section_default
self.ctx.contents = ""
elif doc_state5_start.match(line):
self.debug("FLAG: split_doc_state=1 / switch state 3 --> 5")
self.state = 5
self.split_doc_state = 1
if self.ctx.decl_type in ('function', 'macro'):
self.error("odd construct, gathering documentation of a function"
" outside of the main block?!?")
elif self.ctx.decl_type in ('function', 'macro'):
self.process_state3_function(line)
else:
self.process_state3_type(line)
[docs]
def state_4(self, line):
u"""state: 4 - documentation block"""
if doc_block.match(line):
# a new DOC block arrived, dump the last section and pass the new
# DOC block to state 1.
self.dump_DOC(self.ctx.section, self.ctx.contents)
self.ctx = self.ctx.new()
self.debug("END & START: DOC block / switch state 4 --> 1")
self.state = 1
self.state_1(line)
elif doc_end.match(line):
# the DOC block ends here, dump it and reset to state 0
self.debug("END: DOC block / dump doc section / switch state 4 --> 0")
self.dump_DOC(self.ctx.section, self.ctx.contents)
self.ctx = self.ctx.new()
self.state = 0
elif doc_content.match(line):
cont = doc_content[0]
if ( not cont.strip() # dismiss leading newlines
and not self.ctx.contents):
pass
else:
self.ctx.contents += doc_content[0] + "\n"
[docs]
def state_5(self, line):
u"""state: 5 - gathering documentation outside main block"""
if ( self.split_doc_state == 1
and doc_state5_sect.match(line)):
# First line (split_doc_state 1) needs to be a @parameter
self.ctx.section = self.sect_title(doc_state5_sect[0].strip())
self.ctx.contents = doc_state5_sect[1].strip() + "\n"
self.split_doc_state = 2
self.debug("SPLIT-DOC-START: '%(param)s' / split-state 1 --> 2"
, param = self.ctx.section)
self.ctx.last_offset = self.ctx.line_no
self.info("section: %(sec)s" , sec=self.ctx.section)
elif doc_state5_end.match(line):
# Documentation block end
self.debug("SPLIT-DOC-END: ...")
if not self.ctx.contents.strip():
self.debug("SPLIT-DOC-END: ... no description to dump")
else:
self.dump_section(self.ctx.section, self.ctx.contents)
self.ctx.section = self.section_default
self.ctx.contents = ""
self.debug("SPLIT-DOC-END: ... split-state --> 0 / state = 3")
self.state = 3
self.split_doc_state = 0
elif doc_content.match(line):
# Regular text
if self.split_doc_state == 2:
self.ctx.contents += doc_content[0] + "\n"
elif self.split_doc_state == 1:
self.split_doc_state = 4
self.error("Comment without header was found split-state --> 4")
self.warn("Incorrect use of kernel-doc format: %(line)s"
, line = line)
# ------------------------------------------------------------
# helper to parse special objects
# ------------------------------------------------------------
[docs]
def process_state3_function(self, line):
self.debug("PROCESS-FUNCTION: %(line)s", line=line)
line = C99_comments.sub("", line) # strip C99-style comments to end of line
line = line.strip()
if MACRO_define.search(line) and self.ctx.decl_type != 'macro':
# fix declaration type to 'macro' whenever decalartion type 'macro'
# is missed in the DOC string.
self.ctx.decl_type = 'macro'
stripProto = RE(r"([^\{]*)")
if line.startswith("#") and self.ctx.decl_type != "macro":
# do nothing
pass
elif stripProto.match(line):
self.ctx.prototype += " " + stripProto[0]
if ( MACRO_define.search(line)
or "{" in line
or ";" in line ):
# strip cr&nl, strip C89 comments, strip leading whitespaces
self.ctx.prototype = C89_comments.sub(
"", CR_NL.sub(" ", self.ctx.prototype)).lstrip()
if SYSCALL_DEFINE.search(self.ctx.prototype):
self.ctx.prototype = self.syscall_munge(self.ctx.prototype)
if ( TRACE_EVENT.search(self.ctx.prototype)
or DEFINE_EVENT.search(self.ctx.prototype)
or DEFINE_SINGLE_EVENT.search(self.ctx.prototype) ):
self.ctx.prototype = self.tracepoint_munge(self.ctx.prototype)
self.ctx.prototype = self.ctx.prototype.strip()
self.info("prototype --> '%(proto)s'", proto=self.ctx.prototype)
self.dump_function(self.ctx.prototype)
self.reset_state()
[docs]
def syscall_munge(self, prototype):
self.debug("syscall munge: '%(prototype)s'" , prototype=prototype)
void = False
# strip needles whitespaces
prototype = normalize_ws(prototype)
if SYSCALL_DEFINE0.search(prototype):
void = True
prototype = SYSCALL_DEFINE.sub("long sys_", prototype)
if not self.ctx.last_identifier.startswith("sys_"):
self.ctx.last_identifier = "sys_%s" % self.ctx.last_identifier
if re.search(r"long (sys_.*?),", prototype):
prototype = prototype.replace(",", "(", 1)
elif void:
prototype = prototype.replace(")","(void)",1)
# now delete all of the odd-number commas in $prototype
# so that arg types & arg names don't have a comma between them
retVal = prototype
if not void:
x = prototype.split(",")
y = []
while x:
y.append(x.pop(0) + x.pop(0))
retVal = ",".join(y)
self.debug("syscall munge: retVal '%(retVal)s'" , retVal=retVal)
return retVal
[docs]
def tracepoint_munge(self, prototype):
self.debug("tracepoint munge: %(prototype)s" , prototype=prototype)
retVal = prototype
tp_name = ""
tp_args = ""
if TRACE_EVENT_name.match(prototype):
tp_name = TRACE_EVENT_name[0]
elif DEFINE_SINGLE_EVENT_name.match(prototype):
tp_name = DEFINE_SINGLE_EVENT_name[0]
elif DEFINE_EVENT_name.match(prototype):
tp_name = DEFINE_EVENT_name[1]
tp_name = tp_name.lstrip()
if TP_PROTO.search(prototype):
tp_args = TP_PROTO[0]
if not tp_name.strip() or not tp_args.strip():
self.warn("Unrecognized tracepoint format: %(prototype)s"
, prototype=prototype)
else:
if not self.ctx.last_identifier.startswith("trace_"):
self.ctx.last_identifier = "trace_%s" % self.ctx.last_identifier
retVal = ("static inline void trace_%s(%s)"
% (tp_name, tp_args))
return retVal
[docs]
def process_state3_type(self, line):
self.debug("PROCESS-TYPE: %(line)s", line=line)
# strip cr&nl, strip C99 comments, strip leading&trailing whitespaces
line = C99_comments.sub("", CR_NL.sub(" ", line)).strip()
if MACRO.match(line):
# To distinguish preprocessor directive from regular declaration
# later (drop-semicolon).
line += ";"
m = RE(r"([^{};]*)([{};])(.*)")
while True:
if m.search(line):
if self.ctx.prototype:
self.ctx.prototype += " "
self.ctx.prototype += m[0] + m[1]
if m[1] == "{":
self.brcount += 1
if m[1] == "}":
self.brcount -= 1
if m[1] == ";" and self.brcount == 0:
self.info("prototype --> '%(proto)s'", proto=self.ctx.prototype)
self.debug("decl_type: %(decl_type)s", decl_type=self.ctx.decl_type)
if self.ctx.decl_type == "union":
self.dump_union(self.ctx.prototype)
elif self.ctx.decl_type == "struct":
self.dump_struct(self.ctx.prototype)
elif self.ctx.decl_type == "enum":
self.dump_enum(self.ctx.prototype)
elif self.ctx.decl_type == "typedef":
self.dump_typedef(self.ctx.prototype)
else:
raise ParserBuggy(
self, "unknown decl_type: %s" % self.ctx.decl_type)
self.reset_state()
break
line = m[2]
else:
self.ctx.prototype += line
break
# ------------------------------------------------------------
# dump objects
# ------------------------------------------------------------
[docs]
def dump_preamble(self):
if not self.options.skip_preamble:
self.translator.output_preamble()
[docs]
def dump_epilog(self):
if not self.options.skip_epilog:
self.translator.output_epilog()
[docs]
def dump_prefix(self):
self.translator.output_prefix()
[docs]
def dump_suffix(self):
self.translator.output_suffix()
[docs]
def dump_section(self, name, cont):
u"""Store section's *content* under it's name.
:param str name: name of the section
:param str cont: content of the section
Stores the *content* under section's *name* in one of the *container*. A
container is a hash object, the section name is the *key* and the
content is the *value*.
Container:
* self.ctx.constants: holds constant's descriptions
* self.ctx.parameterdescs: holds parameter's descriptions
* self.ctx.sections: holds common sections like "Return:"
There are the following contai
"""
self.debug("dump_section(): %(name)s", name = name)
name = name.strip()
cont = cont.rstrip() # dismiss trailing whitespace
_type_param = RE(r"\@(\w[.\w]*)") # match @foo and @foo.bar
if _type_param.match(name): # '@parameter' - name of a parameter
name = _type_param[0]
self.debug("parameter definition '%(name)s'", name = name)
if self.ctx.parameterdescs.get(name, None):
self.error("duplicate parameter definition '%(name)s'"
, name = name, line_no = self.ctx.last_offset )
self.ctx.parameterdescs[name] = cont
self.ctx.parameterdescs.offsets[name] = self.ctx.last_offset
self.ctx.sectcheck.append(name)
elif name == "@...":
self.debug("parameter definiton '...'")
name = "..."
if self.ctx.parameterdescs.get(name, None):
self.error("parameter definiton '...'"
, line_no = self.ctx.last_offset )
self.ctx.parameterdescs[name] = cont
self.ctx.parameterdescs.offsets[name] = self.ctx.last_offset
self.ctx.sectcheck.append(name)
else:
self.debug("other section '%(name)s'", name = name)
if self.ctx.sections.get(name, None):
self.warn("duplicate section name '%(name)s'"
, name = name, line_no = self.ctx.last_offset )
self.ctx.sections[name] += "\n\n" + cont
else:
self.ctx.sections[name] = cont
self.ctx.sections.offsets[name] = self.ctx.last_offset
[docs]
def dump_function(self, proto):
self.debug("dump_function(): (1) '%(proto)s'", proto=proto)
hasRetVal = True
proto = re.sub( r"^static +" , "", proto )
proto = re.sub( r"^extern +" , "", proto )
proto = re.sub( r"^asmlinkage +" , "", proto )
proto = re.sub( r"^inline +" , "", proto )
proto = re.sub( r"^__inline__ +" , "", proto )
proto = re.sub( r"^__inline +" , "", proto )
proto = re.sub( r"^__always_inline +", "", proto )
proto = re.sub( r"^noinline +" , "", proto )
proto = re.sub( r"__init +" , "", proto )
proto = re.sub( r"__init_or_module +", "", proto )
proto = re.sub( r"__meminit +" , "", proto )
proto = re.sub( r"__must_check +" , "", proto )
proto = re.sub( r"__weak +" , "", proto )
# Remove known attributes from function prototype
known_attrs = self.options.known_attrs.copy()
if self.options.exp_method == 'attribute':
known_attrs.extend(self.options.exp_ids)
for attr in known_attrs:
proto = re.sub(r"%s +" % attr, "", proto)
define = bool(MACRO_define.match(proto))
proto = MACRO_define.sub("", proto )
proto = re.sub( r"__attribute__\s*\(\("
r"(?:"
r"[\w\s]+" # attribute name
r"(?:\([^)]*\))?" # attribute arguments
r"\s*,?" # optional comma at the end
r")+"
r"\)\)\s+"
, ""
, proto)
# Yes, this truly is vile. We are looking for:
# 1. Return type (may be nothing if we're looking at a macro)
# 2. Function name
# 3. Function parameters.
#
# All the while we have to watch out for function pointer parameters
# (which IIRC is what the two sections are for), C types (these
# regexps don't even start to express all the possibilities), and
# so on.
#
# If you mess with these regexps, it's a good idea to check that
# the following functions' documentation still comes out right:
# - parport_register_device (function pointer parameters)
# - atomic_set (macro)
# - pci_match_device, __copy_to_user (long return type)
self.debug("dump_function(): (2) '%(proto)s'", proto=proto)
x = RE(r"^()([a-zA-Z0-9_~:]+)\s+")
if define and x.match(proto):
# This is an object-like macro, it has no return type and no
# parameter list. Function-like macros are not allowed to have
# spaces between decl_name and opening parenthesis (notice
# the \s+).
self.ctx.return_type = x[0]
self.ctx.decl_name = x[1]
hasRetVal = False
self.debug("dump_function(): (hasRetVal = False) '%(proto)s'"
, proto=proto)
else:
matchExpr = None
for regexp in FUNC_PROTOTYPES:
if regexp.match(proto):
matchExpr = regexp
self.debug("dump_function(): matchExpr = '%(pattern)s' // '%(proto)s'"
, pattern = matchExpr.pattern, proto=proto)
break
if matchExpr is not None:
self.debug("dump_function(): return_type='%(x)s'", x=matchExpr[0])
self.ctx.return_type = matchExpr[0]
self.debug("dump_function(): decl_name='%(x)s'", x=matchExpr[1])
self.ctx.decl_name = matchExpr[1]
self.create_parameterlist(matchExpr[2], ",")
else:
self.warn("can't understand function proto: '%(prototype)s'"
, prototype = self.ctx.prototype
, line_no = self.ctx.decl_offset)
return
if self.ctx.last_identifier != self.ctx.decl_name:
self.warn("function name from comment differs: %s <--> %s"
% (self.ctx.last_identifier, self.ctx.decl_name)
, line_no = self.ctx.decl_offset)
self.check_sections(self.ctx.decl_name
, self.ctx.decl_type
, self.ctx.sectcheck
, self.ctx.parameterlist
)
if hasRetVal:
self.check_return_section(self.ctx.decl_name, self.ctx.return_type)
self.output_decl(
self.ctx.decl_name, "function_decl"
, function = self.ctx.decl_name
, return_type = self.ctx.return_type
, parameterlist = self.ctx.parameterlist
, parameterdescs = self.ctx.parameterdescs
, parametertypes = self.ctx.parametertypes
, sections = self.ctx.sections
, purpose = self.ctx.decl_purpose
, decl_type = self.ctx.decl_type
)
[docs]
def dump_DOC(self, name, cont):
self.dump_section(name, cont)
self.output_decl(name, "DOC"
, sections = self.ctx.sections )
[docs]
def dump_union(self, proto):
if not self.prepare_struct_union(proto):
self.error("can't parse union!")
return
if self.ctx.last_identifier != self.ctx.decl_name:
self.warn("struct name from comment differs: %s <--> %s"
% (self.ctx.last_identifier, self.ctx.decl_name)
, line_no = self.ctx.decl_offset)
self.output_decl(
self.ctx.decl_name, "union_decl"
, decl_name = self.ctx.decl_name
, decl_type = self.ctx.decl_type
, parameterlist = self.ctx.parameterlist
, parameterdescs = self.ctx.parameterdescs
, parametertypes = self.ctx.parametertypes
, sections = self.ctx.sections
, purpose = self.ctx.decl_purpose
, definition = self.ctx.definition )
[docs]
def dump_struct(self, proto):
if not self.prepare_struct_union(proto):
self.error("can't parse struct!")
return
if self.ctx.last_identifier != self.ctx.decl_name:
self.warn("struct name from comment differs: %s <--> %s"
% (self.ctx.last_identifier, self.ctx.decl_name)
, line_no = self.ctx.decl_offset)
self.output_decl(
self.ctx.decl_name, "struct_decl"
, decl_name = self.ctx.decl_name
, decl_type = self.ctx.decl_type
, parameterlist = self.ctx.parameterlist
, parameterdescs = self.ctx.parameterdescs
, parametertypes = self.ctx.parametertypes
, sections = self.ctx.sections
, purpose = self.ctx.decl_purpose
, definition = self.ctx.definition)
[docs]
def prepare_struct_union(self, proto):
self.debug("prepare_struct_union(): '%(proto)s'", proto=proto)
retVal = False
members = ""
# ignore members marked private:
proto = re.sub(r"/\*\s*private:.*?\/\*\s*public:.*?\*\/", "", proto, flags=re.I)
proto = re.sub(r"/\*\s*private:.*$", "};", proto, flags=re.I)
if C_STRUCT_UNION.match(proto):
if C_STRUCT_UNION[0] != self.ctx.decl_type:
self.error("determine of decl_type is inconsistent: '%s' <--> '%s'"
"\nprototype: %s"
% (C_STRUCT_UNION[0], self.ctx.decl_type, proto))
return False
self.ctx.decl_name = C_STRUCT_UNION[1]
self.ctx.definition = members = C89_comments.sub("", C_STRUCT_UNION[2])
# strip kmemcheck_bitfield_{begin,end}.*;
members = re.sub(r"kmemcheck_bitfield_.*?;", "", members)
# strip attributes
members = re.sub(r"__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)", "", members, flags=re.I)
members = re.sub(r"__aligned\s*\([^;]*\)", "", members)
members = re.sub(r"\s*CRYPTO_MINALIGN_ATTR", "", members)
# replace DECLARE_BITMAP
members = re.sub(r"DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)"
, r"unsigned long \1[BITS_TO_LONGS(\2)]"
, members )
# replace DECLARE_HASHTABLE
members = re.sub(r"DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)"
, r"unsigned long \1[1 << ((\2) - 1)]"
, members )
# replace DECLARE_KFIFO
members = re.sub(r"DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)"
, r"\2 \1"
, members )
# replace DECLARE_KFIFO_PTR
members = re.sub(r"DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)"
, r"\2 \1"
, members )
# Split nested struct/union elements as newer ones
NESTED = RE(r"(struct|union)([^{};]+){([^{}]*)}([^{}\;]*)\;")
while NESTED.search(members):
n_content = NESTED[2].strip()
n_type = NESTED[0].strip()
n_ids = NESTED[3].strip()
n_new = ''
# union car {int foo;} bar1, bar2, *bbar3;
for n_id in n_ids.split(','):
n_id = re.sub(r"[:\[].*", "", n_id).strip()
n_id = n_id.strip().replace('*','')
n_new += "%s %s;" % (NESTED[0].strip(), n_id)
for arg in n_content.split(';'):
arg = normalize_ws(arg)
if not arg:
continue
# Handle arrays
arg = re.sub(r"\[\s*\S.*\]", "", arg)
PTR_TO_FUNC = RE(r"^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)")
if PTR_TO_FUNC.search(arg):
n_type = PTR_TO_FUNC[0].strip()
n_name = PTR_TO_FUNC[1].strip()
n_extra = PTR_TO_FUNC[2].strip()
if not n_name:
continue
if not n_id:
n_new += "%s%s%s; " % (n_type, n_name, n_extra)
else:
n_new += "%s%s.%s%s; " % (n_type, n_id, n_name, n_extra)
else:
# suppport bit types e.g. '__u8 arg1 : 1' --> '__u8 arg1'
arg = re.sub(r"\s*:\s*[0-9]+", "", arg)
n_type = arg.split(" ")[0]
n_name = arg.split(" ")[-1].replace('*','')
if n_name == n_type:
# anonymous struct/union
n_new += "%s;" % (n_type)
elif not n_id:
n_new += "%s %s;" % (n_type, n_name)
else:
n_new += "%s %s.%s;" % (n_type, n_id, n_name)
members = NESTED.sub(n_new, members, count=1)
# ignore other nested elements, like enums
members = re.sub(r"({[^\{\}]*})", '', members)
self.create_parameterlist(members, ';')
self.check_sections(self.ctx.decl_name
, self.ctx.decl_type
, self.ctx.sectcheck
, self.ctx.parameterlist # self.ctx.struct_actual.split(" ")
)
retVal = True
else:
retVal = False
return retVal
[docs]
def dump_enum(self, proto):
self.debug("dump_enum(): '%(proto)s'", proto=proto)
proto = C89_comments.sub("", proto)
# strip #define macros inside enums
proto = re.sub(r"#\s*((define|ifdef)\s+|endif)[^;]*;", "", proto)
splitchar = ","
RE_NAME = RE(r"^\s*(\w+).*")
if C_ENUM.search(proto):
self.ctx.decl_name = C_ENUM[0]
members = normalize_ws(C_ENUM[1])
# drop trailing splitchar, if extists
if members.endswith(splitchar):
members = members[:-1]
for member in members.split(splitchar):
name = RE_NAME.sub(r"\1", member)
self.ctx.parameterlist.append(name)
if not self.ctx.parameterdescs.get(name, None):
self.warn(
"Enum value '%(name)s' not described"
" in enum '%(decl_name)s'"
, name = name, decl_name=self.ctx.decl_name )
self.ctx.parameterdescs[name] = Parser.undescribed
if self.ctx.last_identifier != self.ctx.decl_name:
self.warn("enum name from comment differs: %s <--> %s"
% (self.ctx.last_identifier, self.ctx.decl_name)
, line_no = self.ctx.decl_offset)
self.check_sections(self.ctx.decl_name
, self.ctx.decl_type
, self.ctx.sectcheck
, self.ctx.parameterlist )
self.output_decl(
self.ctx.decl_name, "enum_decl"
, enum = self.ctx.decl_name
, parameterlist = self.ctx.parameterlist
, parameterdescs = self.ctx.parameterdescs
, sections = self.ctx.sections
, purpose = self.ctx.decl_purpose )
else:
self.error("can't parse enum!")
[docs]
def dump_typedef(self, proto):
self.debug("dump_typedef(): '%(proto)s'", proto=proto)
proto = C89_comments.sub("", proto)
matchExpr = None
if C_FUNC_TYPEDEF.search(proto):
matchExpr = C_FUNC_TYPEDEF
elif C_FUNC_TYPEDEF_2.search(proto):
self.warn("typedef of function pointer used uncommon code style: '%s'" % proto)
matchExpr = C_FUNC_TYPEDEF_2
if matchExpr:
# Parse function prototypes
self.ctx.return_type = matchExpr[0].lstrip()
self.ctx.decl_name = matchExpr[1]
self.check_return_section(self.ctx.decl_name, self.ctx.return_type)
f_args = matchExpr[2]
self.create_parameterlist(f_args, ',')
if self.ctx.last_identifier != self.ctx.decl_name:
self.warn("function name from comment differs: %s <--> %s"
% (self.ctx.last_identifier, self.ctx.decl_name)
, line_no = self.ctx.decl_offset)
self.check_sections(self.ctx.decl_name
, self.ctx.decl_type
, self.ctx.sectcheck
, self.ctx.parameterlist )
self.output_decl(
self.ctx.decl_name, "function_decl"
, function = self.ctx.decl_name
, return_type = self.ctx.return_type
, parameterlist = self.ctx.parameterlist
, parameterdescs = self.ctx.parameterdescs
, parametertypes = self.ctx.parametertypes
, sections = self.ctx.sections
, purpose = self.ctx.decl_purpose
, decl_type = self.ctx.decl_type
)
else:
self.debug("dump_typedef(): '%(proto)s'", proto=proto)
x1 = RE(r"\(*.\)\s*;$")
x2 = RE(r"\[*.\]\s*;$")
while x1.search(proto) or x2.search(proto):
proto = x1.sub(";", proto)
proto = x2.sub(";", proto)
self.debug("dump_typedef(): '%(proto)s'", proto=proto)
if C_TYPEDEF.match(proto):
self.ctx.decl_name = C_TYPEDEF[0]
if self.ctx.last_identifier != self.ctx.decl_name:
self.warn("typedef name from comment differs: %s <--> %s"
% (self.ctx.last_identifier, self.ctx.decl_name)
, line_no = self.ctx.decl_offset)
self.check_sections(self.ctx.decl_name
, self.ctx.decl_type
, self.ctx.sectcheck
, self.ctx.parameterlist )
self.output_decl(
self.ctx.decl_name, "typedef_decl"
, typedef = self.ctx.decl_name
, sections = self.ctx.sections
, purpose = self.ctx.decl_purpose )
else:
self.error("can't parse typedef!")
[docs]
def create_parameterlist(self, parameter, splitchar):
self.debug("create_parameterlist(): splitchar='%(x)s' params='%(y)s'"
, x=splitchar, y=parameter)
parameter = normalize_ws(parameter)
pointer_to_func = RE(r"\(.+\)\s*\(")
# temporarily replace commas inside function pointer definition
m = RE(r"(\([^\),]+),")
while m.search(parameter):
parameter = m.sub(r"\1#", parameter)
# drop trailing splitchar, if extists
if parameter.endswith(splitchar):
parameter = parameter[:-1]
self.debug("create_parameterlist(): params='%(y)s'", y=parameter)
for c, p in enumerate(parameter.split(splitchar)):
p = C99_comments.sub("", p)
p = p.strip()
self.debug(" parameter#%(c)s: %(p)s", c=c, p=p)
p_type = None
p_name = None
if MACRO.match(p):
# Treat preprocessor directive as a typeless variable just to
# fill corresponding data structures "correctly". Catch it later
# in output_* subs.
self.debug(" parameter#%(c)s: (MACRO) %(p)s=''" , c=c, p=p)
self.push_parameter(p, "")
elif pointer_to_func.search(p):
# pointer-to-function
p = p.replace("#", ",") # reinsert temporarily removed commas
self.debug(" parameter#%(c)s: (pointer to function) %(p)s", c=c, p=p)
m = RE(r"[^\(]+\(\*?\s*([\w\.]*)\s*\)")
m.match(p)
p_name = m[0]
p_type = p
p_type = re.sub(r"([^\(]+\(\*?)\s*"+p_name, r"\1", p_type)
#self.save_struct_actual(p_name)
self.push_parameter(p_name, p_type)
else:
p = re.sub(r"\s*:\s*", ":", p)
p = re.sub(r"\s*\[" , "[", p)
self.debug(" parameter#%(c)s: (common) %(p)s", c=c, p=p)
p_args = re.split(r"\s*,\s*", p)
if re.match(r"\s*,\s*", p_args[0]):
p_args[0] = re.sub(r"(\*+)\s*", r" \1", p_args[0])
self.debug(" parameter#%(c)s : (1) p_args = %(p_args)s"
, c=c, p_args=repr(p_args))
first_arg = []
m = RE(r"^(.*\s+)(.*?\[.*\].*)$")
if m.match(p_args[0]):
p_args.pop(0)
first_arg.extend(re.split(r"\s+", m[0]))
first_arg.append(m[1])
else:
first_arg.extend(re.split(r"\s+", p_args.pop(0)))
p_args = [first_arg.pop() ] + p_args
self.debug(" parameter#%(c)s : (2) p_args=%(p_args)s"
, c=c, p_args=repr(p_args))
p_type = " ".join(first_arg)
ma = RE(r"^(\*+)\s*(.*)")
mb = RE(r"(.*?):(\d+)")
for p_name in p_args:
self.debug(" parameter#%(c)s : (3) p_name='%(p_name)s'"
, c=c, p_name=p_name)
if ma.match(p_name):
p_type = "%s %s" % (p_type, ma[0])
p_name = ma[1]
elif mb.match(p_name):
if p_type:
p_name = mb[0]
p_type = "%s:%s" % (p_type, mb[1])
else:
# skip unnamed bit-fields
continue
self.debug(" parameter#%(c)s : (4) p_name='%(p_name)s' / p_type='%(p_type)s'"
, c=c, p_name=p_name, p_type=p_type)
#self.save_struct_actual(p_name)
self.push_parameter(p_name, p_type)
[docs]
def push_parameter(self, p_name, p_type):
self.debug(
"push_parameter(): p_name='%(p_name)s' / p_type='%(p_type)s'"
, p_name=p_name, p_type=p_type)
p_name = p_name.strip()
p_type = p_type.strip()
if ( self.anon_struct_union
and not p_type
and p_name == "}" ):
# ignore the ending }; from anon. struct/union
return
self.anon_struct_union = False
self.debug(
"push_parameter(): (1) p_name='%(p_name)s' / p_type='%(p_type)s'"
, p_name=p_name, p_type=p_type)
if not p_type and re.search(r"\.\.\.$", p_name):
if not self.ctx.parameterdescs.get(p_name, None):
self.ctx.parameterdescs[p_name] = "variable arguments"
elif not p_type and (not p_name or p_name == "void"):
p_name = "void"
self.ctx.parameterdescs[p_name] = "no arguments"
elif not p_type and (p_name in ("struct", "union")):
# handle unnamed (anonymous) union or struct:
p_type = p_name
p_name = "{unnamed_" + p_name + "}"
self.ctx.parameterdescs[p_name] = "anonymous"
self.anon_struct_union = True
self.debug(
"push_parameter(): (2) p_name='%(p_name)s' / p_type='%(p_type)s'"
, p_name=p_name, p_type=p_type)
if not p_name.startswith("#"):
# strip array from paramater name / e.g. p_name is "modes[]" from a
# parmeter defined by: "const char * const modes[]"
p_name = re.sub(r"\[.*", "", p_name)
# strip parentheses and pointers, e.g.: (*foo) --> foo
p_name = re.sub(r"[\*\(\)]", "", p_name)
self.debug(
"push_parameter(): (3) p_name='%(p_name)s' / p_type='%(p_type)s'"
, p_name=p_name, p_type=p_type)
# warn if parameter has no description (but ignore ones starting with
# '#' as these are not parameters but inline preprocessor statements);
# also ignore unnamed structs/unions;
if not self.anon_struct_union:
if ( not self.ctx.parameterdescs.get(p_name, None)
and not p_name.startswith("#") ):
if p_type in ("function", "enum"):
self.warn("Function parameter or member '%(p_name)s' not "
"described in '%(decl_name)s'."
, p_name = p_name
, decl_name = self.ctx.decl_name
, line_no = self.ctx.last_offset)
else:
self.warn("no description found for parameter '%(p_name)s'"
, p_name = p_name, line_no = self.ctx.decl_offset)
self.ctx.parameterdescs[p_name] = Parser.undescribed
self.ctx.parameterlist.append(p_name)
self.ctx.parametertypes[p_name] = p_type.strip()
# def save_struct_actual(self, actual):
# # strip all spaces from the actual param so that it looks like one
# # string item
# self.debug("save_struct_actual(): actual='%(a)s'", a=actual)
# actual = WHITESPACE.sub("", actual)
# self.ctx.struct_actual += actual + " "
# self.debug("save_struct_actual: '%(a)s'", a=self.ctx.struct_actual)
[docs]
def check_sections(self, decl_name, decl_type
, sectcheck, parameterlist):
self.debug("check_sections(): decl_name='%(n)s' / decl_type='%(t)s' /"
" sectcheck=%(sc)s / parameterlist=%(pl)s"
, n=decl_name, t=decl_type, sc=sectcheck, pl=parameterlist)
for sect in sectcheck:
err = True
for para in parameterlist:
para = re.sub(r"\[.*\]", "", para)
#para = re.sub(r"/__attribute__\s*\(\([A-Za-z,_\*\s\(\)]*\)\)/", "", para)
if para == sect:
err = False
break
if err:
if decl_type in ('function', 'macro'):
self.warn(
"excess function parameter '%(sect)s' description in '%(decl_name)s'"
, sect = sect, decl_name = decl_name
, line_no = self.ctx.decl_offset )
else:
self.warn(
"excess %(decl_type)s member '%(sect)s' description in '%(decl_name)s'"
, decl_type = decl_type, decl_name = decl_name, sect = sect
, line_no = self.ctx.decl_offset )
else:
self.debug("check_sections(): parameter '%(sect)s': description exists / OK"
, sect=sect)
[docs]
def check_return_section(self, decl_name, return_type):
self.debug("check_return_section(): decl_name='%(n)s', return_type='%(t)s"
, n=decl_name, t=return_type)
# Ignore an empty return type (It's a macro) and ignore functions with a
# "void" return type. (But don't ignore "void *")
if ( not return_type
or re.match(r"void\s*\w*\s*$", return_type) ):
self.debug("check_return_section(): ignore void")
return
if self.options.verbose_warn and not self.ctx.sections.get(self.section_return, None):
self.warn("no description found for return-value of function '%(func)s()'"
, func = decl_name, line_no = self.ctx.decl_offset)
else:
self.debug("check_return_section(): return-value of %(func)s() OK"
, func = decl_name)