docs/_scripts/siphon/process.py

   1 # Copyright (c) 2016 Comcast Cable Communications Management, LLC.
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at:
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Generation template class
  16
  17 import html.parser
  18 import json
  19 import logging
  20 import os
  21 import sys
  22 import re
  23
  24 import jinja2
  25
  26 # Classes register themselves in this dictionary
  27 """Mapping of known processors to their classes"""
  28 siphons = {}
  29
  30 """Mapping of known output formats to their classes"""
  31 formats = {}
  32
  33
  34 class Siphon(object):
  35     """Generate rendered output for siphoned data."""
  36
  37     # Set by subclasses
  38     """Our siphon name"""
  39     name = None
  40
  41     # Set by subclasses
  42     """Name of an identifier used by this siphon"""
  43     identifier = None
  44
  45     # Set by subclasses
  46     """The pyparsing object to use to parse with"""
  47     _parser = None
  48
  49     """The input data"""
  50     _cmds = None
  51
  52     """Group key to (directory,file) mapping"""
  53     _group = None
  54
  55     """Logging handler"""
  56     log = None
  57
  58     """Directory to look for siphon rendering templates"""
  59     template_directory = None
  60
  61     """Directory to output parts in"""
  62     outdir = None
  63
  64     """Template environment, if we're using templates"""
  65     _tplenv = None
  66
  67     def __init__(self, template_directory, format, outdir, repository_link):
  68         super(Siphon, self).__init__()
  69         self.log = logging.getLogger("siphon.process.%s" % self.name)
  70
  71         # Get our output format details
  72         fmt_klass = formats[format]
  73         fmt = fmt_klass()
  74         self._format = fmt
  75
  76         # Sort out the template search path
  77         def _tpldir(name):
  78             return os.sep.join((template_directory, fmt.name, name))
  79
  80         self.template_directory = template_directory
  81         searchpath = [
  82             _tpldir(self.name),
  83             _tpldir("default"),
  84         ]
  85         self.outdir = outdir
  86         loader = jinja2.FileSystemLoader(searchpath=searchpath)
  87         self._tplenv = jinja2.Environment(
  88             loader=loader,
  89             trim_blocks=True,
  90             autoescape=False,
  91             keep_trailing_newline=True)
  92
  93         # Convenience, get a reference to the internal escape and
  94         # unescape methods in html.parser. These then become
  95         # available to templates to use, if needed.
  96         self._h = html.parser.HTMLParser()
  97         self.escape = html.escape
  98         self.unescape = html.unescape
  99
 100         # TODO: customize release
 101         self.repository_link = repository_link
 102
 103     # Output renderers
 104
 105     """Returns an object to be used as the sorting key in the item index."""
 106     def index_sort_key(self, group):
 107         return group
 108
 109     """Returns a string to use as the header at the top of the item index."""
 110     def index_header(self):
 111         return self.template("index_header")
 112
 113     """Returns the string fragment to use for each section in the item
 114     index."""
 115     def index_section(self, group):
 116         return self.template("index_section", group=group)
 117
 118     """Returns the string fragment to use for each entry in the item index."""
 119     def index_entry(self, meta, item):
 120         return self.template("index_entry", meta=meta, item=item)
 121
 122     """Returns an object, typically a string, to be used as the sorting key
 123     for items within a section."""
 124     def item_sort_key(self, item):
 125         return item['name']
 126
 127     """Returns a key for grouping items together."""
 128     def group_key(self, directory, file, macro, name):
 129         _global = self._cmds['_global']
 130
 131         if file in _global and 'group_label' in _global[file]:
 132             self._group[file] = (directory, file)
 133             return file
 134
 135         self._group[directory] = (directory, None)
 136         return directory
 137
 138     """Returns a key for identifying items within a grouping."""
 139     def item_key(self, directory, file, macro, name):
 140         return name
 141
 142     """Returns a string to use as the header when rendering the item."""
 143     def item_header(self, group):
 144         return self.template("item_header", group=group)
 145
 146     """Returns a string to use as the body when rendering the item."""
 147     def item_format(self, meta, item):
 148         return self.template("item_format", meta=meta, item=item)
 149
 150     """Returns a string to use as the label for the page reference."""
 151     def page_label(self, group):
 152         return "_".join((
 153             self.name,
 154             self.sanitize_label(group)
 155         ))
 156
 157     """Returns a title to use for a page."""
 158     def page_title(self, group):
 159         _global = self._cmds['_global']
 160         (directory, file) = self._group[group]
 161
 162         if file and file in _global and 'group_label' in _global[file]:
 163             return _global[file]['group_label']
 164
 165         if directory in _global and 'group_label' in _global[directory]:
 166             return _global[directory]['group_label']
 167
 168         return directory
 169
 170     """Returns a string to use as the label for the section reference."""
 171     def item_label(self, group, item):
 172         return "__".join((
 173             self.name,
 174             item
 175         ))
 176
 177     """Label sanitizer; for creating Doxygen references"""
 178     def sanitize_label(self, value):
 179         return value.replace(" ", "_") \
 180                     .replace("/", "_") \
 181                     .replace(".", "_")
 182
 183     """Template processor"""
 184     def template(self, name, **kwargs):
 185         tpl = self._tplenv.get_template(name + self._format.extension)
 186         return tpl.render(
 187             this=self,
 188             **kwargs)
 189
 190     # Processing methods
 191
 192     """Parse the input file into a more usable dictionary structure."""
 193     def load_json(self, files):
 194         self._cmds = {}
 195         self._group = {}
 196
 197         line_num = 0
 198         line_start = 0
 199         for filename in files:
 200             filename = os.path.relpath(filename)
 201             self.log.info("Parsing items in file \"%s\"." % filename)
 202             data = None
 203             with open(filename, "r") as fd:
 204                 data = json.load(fd)
 205
 206             self._cmds['_global'] = data['global']
 207
 208             # iterate the items loaded and regroup it
 209             for item in data["items"]:
 210                 try:
 211                     o = self._parser.parse(item['block'])
 212                 except Exception:
 213                     self.log.error("Exception parsing item: %s\n%s"
 214                                    % (json.dumps(item, separators=(',', ': '),
 215                                                  indent=4),
 216                                       item['block']))
 217                     raise
 218
 219                 # Augment the item with metadata
 220                 o["meta"] = {}
 221                 for key in item:
 222                     if key == 'block':
 223                         continue
 224                     o['meta'][key] = item[key]
 225
 226                 # Load some interesting fields
 227                 directory = item['directory']
 228                 file = item['file']
 229                 macro = o["macro"]
 230                 name = o["name"]
 231
 232                 # Generate keys to group items by
 233                 group_key = self.group_key(directory, file, macro, name)
 234                 item_key = self.item_key(directory, file, macro, name)
 235
 236                 if group_key not in self._cmds:
 237                     self._cmds[group_key] = {}
 238
 239                 self._cmds[group_key][item_key] = o
 240
 241     """Iterate over the input data, calling render methods to generate the
 242     output."""
 243     def process(self, out=None):
 244
 245         if out is None:
 246             out = sys.stdout
 247
 248         # Accumulated body contents
 249         contents = ""
 250
 251         # Write the header for this siphon type
 252         out.write(self.index_header())
 253
 254         # Sort key helper for the index
 255         def group_sort_key(group):
 256             return self.index_sort_key(group)
 257
 258         # Iterate the dictionary and process it
 259         for group in sorted(self._cmds.keys(), key=group_sort_key):
 260             if group.startswith('_'):
 261                 continue
 262
 263             self.log.info("Processing items in group \"%s\" (%s)." %
 264                           (group, group_sort_key(group)))
 265
 266             # Generate the section index entry (write it now)
 267             out.write(self.index_section(group))
 268
 269             # Generate the item header (save for later)
 270             contents += self.item_header(group)
 271
 272             def item_sort_key(key):
 273                 return self.item_sort_key(self._cmds[group][key])
 274
 275             for key in sorted(self._cmds[group].keys(), key=item_sort_key):
 276                 self.log.debug("--- Processing key \"%s\" (%s)." %
 277                                (key, item_sort_key(key)))
 278
 279                 o = self._cmds[group][key]
 280                 meta = {
 281                     "directory": o['meta']['directory'],
 282                     "file": o['meta']['file'],
 283                     "macro": o['macro'],
 284                     "name": o['name'],
 285                     "key": key,
 286                     "label": self.item_label(group, key),
 287                 }
 288
 289                 # Generate the index entry for the item (write it now)
 290                 out.write(self.index_entry(meta, o))
 291
 292                 # Generate the item itself (save for later)
 293                 contents += self.item_format(meta, o)
 294
 295             page_name = self.separate_page_names(group)
 296             if page_name != "":
 297                 path = os.path.join(self.outdir, page_name)
 298                 with open(path, "w+") as page:
 299                     page.write(contents)
 300                 contents = ""
 301
 302         # Deliver the accumulated body output
 303         out.write(contents)
 304
 305     def do_cliexstart(self, matchobj):
 306         title = matchobj.group(1)
 307         title = ' '.join(title.splitlines())
 308         content = matchobj.group(2)
 309         content = re.sub(r"\n", r"\n    ", content)
 310         return "\n\n.. code-block:: console\n\n    %s\n    %s\n\n" % (title, content)
 311
 312     def do_clistart(self, matchobj):
 313         content = matchobj.group(1)
 314         content = re.sub(r"\n", r"\n    ", content)
 315         return "\n\n.. code-block:: console\n\n    %s\n\n" % content
 316
 317     def do_cliexcmd(self, matchobj):
 318         content = matchobj.group(1)
 319         content = ' '.join(content.splitlines())
 320         return "\n\n.. code-block:: console\n\n    %s\n\n" % content
 321
 322     def process_list(self, matchobj):
 323         content = matchobj.group(1)
 324         content = self.reindent(content, 2)
 325         return "@@@@%s\nBBBB" % content
 326
 327     def process_special(self, s):
 328         # ----------- markers to remove
 329         s = re.sub(r"@cliexpar\s*", r"", s)
 330         s = re.sub(r"@parblock\s*", r"", s)
 331         s = re.sub(r"@endparblock\s*", r"", s)
 332         s = re.sub(r"<br>", "", s)
 333         # ----------- emphasis
 334         # <b><em>
 335         s = re.sub(r"<b><em>\s*", "``", s)
 336         s = re.sub(r"\s*</b></em>", "``", s)
 337         s = re.sub(r"\s*</em></b>", "``", s)
 338         # <b>
 339         s = re.sub(r"<b>\s*", "**", s)
 340         s = re.sub(r"\s*</b>", "**", s)
 341         # <code>
 342         s = re.sub(r"<code>\s*", "``", s)
 343         s = re.sub(r"\s*</code>", "``", s)
 344         # <em>
 345         s = re.sub(r"'?<em>\s*", r"``", s)
 346         s = re.sub(r"\s*</em>'?", r"``", s)
 347         # @c <something>
 348         s = re.sub(r"@c\s(\S+)", r"``\1``", s)
 349         # ----------- todos
 350         s = re.sub(r"@todo[^\n]*", "", s)
 351         s = re.sub(r"@TODO[^\n]*", "", s)
 352         # ----------- code blocks
 353         s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
 354         s = re.sub(r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL)
 355         s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
 356         # ----------- lists
 357         s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
 358         s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
 359         s = re.sub(r"BBBB@@@@", r"-", s)
 360         s = re.sub(r"@@@@", r"-", s)
 361         s = re.sub(r"BBBB", r"\n\n", s)
 362         # ----------- Cleanup remains
 363         s = re.sub(r"@cliexend\s*", r"", s)
 364         return s
 365
 366     def separate_page_names(self, group):
 367         return ""
 368
 369     # This push the given textblock <indent> spaces right
 370     def reindent(self, s, indent):
 371         ind = " " * indent
 372         s = re.sub(r"\n", "\n" + ind, s)
 373         return s
 374
 375     # This aligns the given textblock left (no indent)
 376     def noindent(self, s):
 377         s = re.sub(r"\n[ \f\v\t]*", "\n", s)
 378         return s
 379
 380 class Format(object):
 381     """Output format class"""
 382
 383     """Name of this output format"""
 384     name = None
 385
 386     """Expected file extension of templates that build this format"""
 387     extension = None
 388
 389
 390 class FormatMarkdown(Format):
 391     """Markdown output format"""
 392     name = "markdown"
 393     extension = ".md"
 394
 395
 396 # Register 'markdown'
 397 formats["markdown"] = FormatMarkdown
 398
 399
 400 class FormatItemlist(Format):
 401     """Itemlist output format"""
 402     name = "itemlist"
 403     extension = ".itemlist"
 404
 405
 406 # Register 'itemlist'
 407 formats["itemlist"] = FormatItemlist