#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# pylint: disable=C0103,R0912,R0914,R0915
u"""
dbxml2rst.pandoc
~~~~~~~~~~~~~~~~
Pandoc stuff used by the dbxml2rst library
:copyright: Copyright (C) 2017 Markus Heiser
:license: GPL V3.0, see LICENSE for details.
"""
# ==============================================================================
# imports
# ==============================================================================
import re
import sys
import functools
import json
from fspath import which
from .nodes import Table
from . import helper
from .helper import LOG
# ==============================================================================
# constants
# ==============================================================================
PANDOC_EXE = None
[docs]def init():
global PANDOC_EXE # pylint: disable=W0603
PANDOC_EXE = which('pandoc', False)
# ==============================================================================
[docs]def xml2json(src, dst, **kwargs):
# ==============================================================================
u"""convert xml file to json file with pandoc"""
if not PANDOC_EXE:
LOG.error("pandoc is not installed")
sys.exit(42)
proc = PANDOC_EXE.Popen(
"--smart"
# , "-s" # standalone document
, "--from", "docbook"
, "--to", "json"
, "--output" , dst
, src
, **kwargs )
proc.communicate()
# ==============================================================================
[docs]def toJSONFilters(input_stream, output_stream, *actions):
# ==============================================================================
"""Modified version of pandoc filter.
This version of pandoc filter is able to read from any input stream (not only
from stdin) and writes to any output stream (not only stdout).
"""
import pandocfilters
doc = json.loads(input_stream.read())
fmt = "json"
altered = functools.reduce(
lambda x, action: pandocfilters.walk(x, action, fmt, doc[0]['unMeta'])
, actions, doc )
json.dump(altered, output_stream)
# ==============================================================================
[docs]def jsonFilter(src, dst, *filters):
# ==============================================================================
u"""apply ``*filters`` on a pandoc json file"""
with src.openTextFile() as inFile, dst.openTextFile("w") as outFile:
toJSONFilters(inFile, outFile, *filters)
# ==============================================================================
[docs]def json2rst(src, dst, **kwargs):
# ==============================================================================
u"""convert a json file with pandoc to reST markup"""
proc = PANDOC_EXE.Popen(
"--reference-links"
, "--from", "json"
, "--to", "rst"
, "--output" , dst
# activate this for the large ASCII tables
#, "--columns" , "180"
, src
, **kwargs )
proc.communicate()
# ==============================================================================
[docs]def fixPandocRST(src, dst):
# ==============================================================================
u"""Fix common reST markup bugs from the pandoc reST writer. """
# fix malicious pandoc quoting
# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Writers/RST.hs#L162
# --> """escapeStringUsing (backslashEscapes "`\\|*_")"""
backslashEscapes = re.compile(r"\\[`\|\||\*|_]")
indent = ""
with src.openTextFile() as src, dst.openTextFile("w") as dst:
dst.write(helper.rstHEADER)
for line in src:
line = line.replace(u"⋆", "*")
striped = line.strip()
if not striped:
dst.write("\n")
continue
if striped == Table.tableStartMark:
indent += Table.rstBlock
continue
if striped == Table.tableEndMark:
indent = indent[:-len(Table.rstBlock)]
continue
line = indent + line
if backslashEscapes.search(line):
spaces = ""
if line.strip()[0] == "|":
# this is a table markup
buf = ""
for c in line:
if c == "\\":
spaces += " "
elif spaces and c in ["\t", " ", "\n"]:
buf += spaces + c
spaces = ""
else:
buf += c
line = buf.rstrip() + "\n"
else:
line = line.replace("\\", "")
dst.write(line)
dst.write(helper.rstFOOTER)
# ==============================================================================
# init
# ==============================================================================
init()