docs/_scripts/siphon/process.py

   1 # Copyright (c) 2016 Comcast Cable Communications Management, LLC.
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at:
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Generation template class
  16
  17 import html.parser
  18 import json
  19 import logging
  20 import os
  21 import sys
  22 import re
  23
  24 import jinja2
  25
  26 # Classes register themselves in this dictionary
  27 """Mapping of known processors to their classes"""
  28 siphons = {}
  29
  30 """Mapping of known output formats to their classes"""
  31 formats = {}
  32
  33
  34 class Siphon(object):
  35     """Generate rendered output for siphoned data."""
  36
  37     # Set by subclasses
  38     """Our siphon name"""
  39     name = None
  40
  41     # Set by subclasses
  42     """Name of an identifier used by this siphon"""
  43     identifier = None
  44
  45     # Set by subclasses
  46     """The pyparsing object to use to parse with"""
  47     _parser = None
  48
  49     """The input data"""
  50     _cmds = None
  51
  52     """Group key to (directory,file) mapping"""
  53     _group = None
  54
  55     """Logging handler"""
  56     log = None
  57
  58     """Directory to look for siphon rendering templates"""
  59     template_directory = None
  60
  61     """Directory to output parts in"""
  62     outdir = None
  63
  64     """Template environment, if we're using templates"""
  65     _tplenv = None
  66
  67     def __init__(self, template_directory, format, outdir, repository_link):
  68         super(Siphon, self).__init__()
  69         self.log = logging.getLogger("siphon.process.%s" % self.name)
  70
  71         # Get our output format details
  72         fmt_klass = formats[format]
  73         fmt = fmt_klass()
  74         self._format = fmt
  75
  76         # Sort out the template search path
  77         def _tpldir(name):
  78             return os.sep.join((template_directory, fmt.name, name))
  79
  80         self.template_directory = template_directory
  81         searchpath = [
  82             _tpldir(self.name),
  83             _tpldir("default"),
  84         ]
  85         self.outdir = outdir
  86         loader = jinja2.FileSystemLoader(searchpath=searchpath)
  87         self._tplenv = jinja2.Environment(
  88             loader=loader,
  89             trim_blocks=True,
  90             autoescape=False,
  91             keep_trailing_newline=True,
  92         )
  93
  94         # Convenience, get a reference to the internal escape and
  95         # unescape methods in html.parser. These then become
  96         # available to templates to use, if needed.
  97         self._h = html.parser.HTMLParser()
  98         self.escape = html.escape
  99         self.unescape = html.unescape
 100
 101         # TODO: customize release
 102         self.repository_link = repository_link
 103
 104     # Output renderers
 105
 106     """Returns an object to be used as the sorting key in the item index."""
 107
 108     def index_sort_key(self, group):
 109         return group
 110
 111     """Returns a string to use as the header at the top of the item index."""
 112
 113     def index_header(self):
 114         return self.template("index_header")
 115
 116     """Returns the string fragment to use for each section in the item
 117     index."""
 118
 119     def index_section(self, group):
 120         return self.template("index_section", group=group)
 121
 122     """Returns the string fragment to use for each entry in the item index."""
 123
 124     def index_entry(self, meta, item):
 125         return self.template("index_entry", meta=meta, item=item)
 126
 127     """Returns an object, typically a string, to be used as the sorting key
 128     for items within a section."""
 129
 130     def item_sort_key(self, item):
 131         return item["name"]
 132
 133     """Returns a key for grouping items together."""
 134
 135     def group_key(self, directory, file, macro, name):
 136         _global = self._cmds["_global"]
 137
 138         if file in _global and "group_label" in _global[file]:
 139             self._group[file] = (directory, file)
 140             return file
 141
 142         self._group[directory] = (directory, None)
 143         return directory
 144
 145     """Returns a key for identifying items within a grouping."""
 146
 147     def item_key(self, directory, file, macro, name):
 148         return name
 149
 150     """Returns a string to use as the header when rendering the item."""
 151
 152     def item_header(self, group):
 153         return self.template("item_header", group=group)
 154
 155     """Returns a string to use as the body when rendering the item."""
 156
 157     def item_format(self, meta, item):
 158         return self.template("item_format", meta=meta, item=item)
 159
 160     """Returns a string to use as the label for the page reference."""
 161
 162     def page_label(self, group):
 163         return "_".join((self.name, self.sanitize_label(group)))
 164
 165     """Returns a title to use for a page."""
 166
 167     def page_title(self, group):
 168         _global = self._cmds["_global"]
 169         (directory, file) = self._group[group]
 170
 171         if file and file in _global and "group_label" in _global[file]:
 172             return _global[file]["group_label"]
 173
 174         if directory in _global and "group_label" in _global[directory]:
 175             return _global[directory]["group_label"]
 176
 177         return directory
 178
 179     """Returns a string to use as the label for the section reference."""
 180
 181     def item_label(self, group, item):
 182         return "__".join((self.name, item))
 183
 184     """Label sanitizer; for creating Doxygen references"""
 185
 186     def sanitize_label(self, value):
 187         return value.replace(" ", "_").replace("/", "_").replace(".", "_")
 188
 189     """Template processor"""
 190
 191     def template(self, name, **kwargs):
 192         tpl = self._tplenv.get_template(name + self._format.extension)
 193         return tpl.render(this=self, **kwargs)
 194
 195     # Processing methods
 196
 197     """Parse the input file into a more usable dictionary structure."""
 198
 199     def load_json(self, files):
 200         self._cmds = {}
 201         self._group = {}
 202
 203         line_num = 0
 204         line_start = 0
 205         for filename in files:
 206             filename = os.path.relpath(filename)
 207             self.log.info('Parsing items in file "%s".' % filename)
 208             data = None
 209             with open(filename, "r") as fd:
 210                 data = json.load(fd)
 211
 212             self._cmds["_global"] = data["global"]
 213
 214             # iterate the items loaded and regroup it
 215             for item in data["items"]:
 216                 try:
 217                     o = self._parser.parse(item["block"])
 218                 except Exception:
 219                     self.log.error(
 220                         "Exception parsing item: %s\n%s"
 221                         % (
 222                             json.dumps(item, separators=(",", ": "), indent=4),
 223                             item["block"],
 224                         )
 225                     )
 226                     raise
 227
 228                 # Augment the item with metadata
 229                 o["meta"] = {}
 230                 for key in item:
 231                     if key == "block":
 232                         continue
 233                     o["meta"][key] = item[key]
 234
 235                 # Load some interesting fields
 236                 directory = item["directory"]
 237                 file = item["file"]
 238                 macro = o["macro"]
 239                 name = o["name"]
 240
 241                 # Generate keys to group items by
 242                 group_key = self.group_key(directory, file, macro, name)
 243                 item_key = self.item_key(directory, file, macro, name)
 244
 245                 if group_key not in self._cmds:
 246                     self._cmds[group_key] = {}
 247
 248                 self._cmds[group_key][item_key] = o
 249
 250     """Iterate over the input data, calling render methods to generate the
 251     output."""
 252
 253     def process(self, out=None):
 254
 255         if out is None:
 256             out = sys.stdout
 257
 258         # Accumulated body contents
 259         contents = ""
 260
 261         # Write the header for this siphon type
 262         out.write(self.index_header())
 263
 264         # Sort key helper for the index
 265         def group_sort_key(group):
 266             return self.index_sort_key(group)
 267
 268         # Iterate the dictionary and process it
 269         for group in sorted(self._cmds.keys(), key=group_sort_key):
 270             if group.startswith("_"):
 271                 continue
 272
 273             self.log.info(
 274                 'Processing items in group "%s" (%s).' % (group, group_sort_key(group))
 275             )
 276
 277             # Generate the section index entry (write it now)
 278             out.write(self.index_section(group))
 279
 280             # Generate the item header (save for later)
 281             contents += self.item_header(group)
 282
 283             def item_sort_key(key):
 284                 return self.item_sort_key(self._cmds[group][key])
 285
 286             for key in sorted(self._cmds[group].keys(), key=item_sort_key):
 287                 self.log.debug(
 288                     '--- Processing key "%s" (%s).' % (key, item_sort_key(key))
 289                 )
 290
 291                 o = self._cmds[group][key]
 292                 meta = {
 293                     "directory": o["meta"]["directory"],
 294                     "file": o["meta"]["file"],
 295                     "macro": o["macro"],
 296                     "name": o["name"],
 297                     "key": key,
 298                     "label": self.item_label(group, key),
 299                 }
 300
 301                 # Generate the index entry for the item (write it now)
 302                 out.write(self.index_entry(meta, o))
 303
 304                 # Generate the item itself (save for later)
 305                 contents += self.item_format(meta, o)
 306
 307             page_name = self.separate_page_names(group)
 308             if page_name != "":
 309                 path = os.path.join(self.outdir, page_name)
 310                 with open(path, "w+") as page:
 311                     page.write(contents)
 312                 contents = ""
 313
 314         # Deliver the accumulated body output
 315         out.write(contents)
 316
 317     def do_cliexstart(self, matchobj):
 318         title = matchobj.group(1)
 319         title = " ".join(title.splitlines())
 320         content = matchobj.group(2)
 321         content = re.sub(r"\n", r"\n    ", content)
 322         return "\n\n.. code-block:: console\n\n    %s\n    %s\n\n" % (title, content)
 323
 324     def do_clistart(self, matchobj):
 325         content = matchobj.group(1)
 326         content = re.sub(r"\n", r"\n    ", content)
 327         return "\n\n.. code-block:: console\n\n    %s\n\n" % content
 328
 329     def do_cliexcmd(self, matchobj):
 330         content = matchobj.group(1)
 331         content = " ".join(content.splitlines())
 332         return "\n\n.. code-block:: console\n\n    %s\n\n" % content
 333
 334     def process_list(self, matchobj):
 335         content = matchobj.group(1)
 336         content = self.reindent(content, 2)
 337         return "@@@@%s\nBBBB" % content
 338
 339     def process_special(self, s):
 340         # ----------- markers to remove
 341         s = re.sub(r"@cliexpar\s*", r"", s)
 342         s = re.sub(r"@parblock\s*", r"", s)
 343         s = re.sub(r"@endparblock\s*", r"", s)
 344         s = re.sub(r"<br>", "", s)
 345         # ----------- emphasis
 346         # <b><em>
 347         s = re.sub(r"<b><em>\s*", "``", s)
 348         s = re.sub(r"\s*</b></em>", "``", s)
 349         s = re.sub(r"\s*</em></b>", "``", s)
 350         # <b>
 351         s = re.sub(r"<b>\s*", "**", s)
 352         s = re.sub(r"\s*</b>", "**", s)
 353         # <code>
 354         s = re.sub(r"<code>\s*", "``", s)
 355         s = re.sub(r"\s*</code>", "``", s)
 356         # <em>
 357         s = re.sub(r"'?<em>\s*", r"``", s)
 358         s = re.sub(r"\s*</em>'?", r"``", s)
 359         # @c <something>
 360         s = re.sub(r"@c\s(\S+)", r"``\1``", s)
 361         # ----------- todos
 362         s = re.sub(r"@todo[^\n]*", "", s)
 363         s = re.sub(r"@TODO[^\n]*", "", s)
 364         # ----------- code blocks
 365         s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
 366         s = re.sub(
 367             r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL
 368         )
 369         s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
 370         # ----------- lists
 371         s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
 372         s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
 373         s = re.sub(r"BBBB@@@@", r"-", s)
 374         s = re.sub(r"@@@@", r"-", s)
 375         s = re.sub(r"BBBB", r"\n\n", s)
 376         # ----------- Cleanup remains
 377         s = re.sub(r"@cliexend\s*", r"", s)
 378         return s
 379
 380     def separate_page_names(self, group):
 381         return ""
 382
 383     # This push the given textblock <indent> spaces right
 384     def reindent(self, s, indent):
 385         ind = " " * indent
 386         s = re.sub(r"\n", "\n" + ind, s)
 387         return s
 388
 389     # This aligns the given textblock left (no indent)
 390     def noindent(self, s):
 391         s = re.sub(r"\n[ \f\v\t]*", "\n", s)
 392         return s
 393
 394
 395 class Format(object):
 396     """Output format class"""
 397
 398     """Name of this output format"""
 399     name = None
 400
 401     """Expected file extension of templates that build this format"""
 402     extension = None
 403
 404
 405 class FormatMarkdown(Format):
 406     """Markdown output format"""
 407
 408     name = "markdown"
 409     extension = ".md"
 410
 411
 412 # Register 'markdown'
 413 formats["markdown"] = FormatMarkdown
 414
 415
 416 class FormatItemlist(Format):
 417     """Itemlist output format"""
 418
 419     name = "itemlist"
 420     extension = ".itemlist"
 421
 422
 423 # Register 'itemlist'
 424 formats["itemlist"] = FormatItemlist