Source code for SlicerWizard.CMakeParser

"""Symbolic manipulation of CMake scripts.

This module provides a set of classes to support symbolic parsing of CMake
script. The use of symbolic parsing, as opposed to regular expressions, frees
the user from needing to worry about syntactic context (is that really a
function, or is it inside of a string or comment?) and provides a representation
that is more suitable to direct manipulation.

In addition to the several token classes provided, :class:`.CMakeScript`
provides an interface for bidirectional translation between raw text and
tokenized representations.

Unlike other parsers, this module is not a "pretty formatter", but rather is
specifically designed to preserve the original formatting of a script in order
to be able to perform convert losslessly from raw to parsed form and back,
while still providing a parsed form that is easy to use and manipulate. Care
should be used, however, when creating or manipulating scripts, as there are
effectively no safeguards against producing a script that is syntactically
invalid.
"""

import os
import re
import string

#=============================================================================
[docs]class Token(object):
  """Base class for CMake script tokens.

  This is the base class for CMake script tokens. An occurrence of a token
  whose type is exactly :class:`.Token` (i.e. not a subclass thereof) is a
  syntactic error unless the token text is empty.

  .. attribute:: text

    The textual content of the token.

  .. attribute:: indent

    The whitespace (including newlines) which preceded the token. As the parser
    is strictly preserving of whitespace, note that this must be non-empty in
    many cases in order to produce a syntactically correct script.
  """

  #---------------------------------------------------------------------------
  def __init__(self, text, indent=""):
    self.text = text
    self.indent = indent

  #---------------------------------------------------------------------------
  def __repr__(self):
    return "Token(text=%(text)r, indent=%(indent)r)" % self.__dict__

  #---------------------------------------------------------------------------
  def __str__(self):
    return self.indent + self.text

#=============================================================================
[docs]class String(Token):
  """String token.

  .. attribute:: text

    The textual content of the string. Note that escapes are not evaluated and
    will appear in their raw (escaped) form.

  .. attribute:: prefix

    The delimiter which starts this string. The delimiter may be empty,
    ``'"'``, or a lua-style long bracket (e.g. ``'[['``, ``'[===['``, etc.).

  .. attribute:: suffix

    The delimiter which ends this string, which shall match the :attr:`prefix`.

  String tokens appear as arguments to :class:`.Command`, as they are not valid
  outside of a command context.
  """

  #---------------------------------------------------------------------------
  def __init__(self, text, indent="", prefix="", suffix=""):
    text = super(String, self).__init__(text, indent)
    self.prefix = prefix
    self.suffix = suffix

  #---------------------------------------------------------------------------
  def __repr__(self):
    return "String(prefix=%(prefix)r, suffix=%(suffix)r," \
           " text=%(text)r, indent=%(indent)r)" % self.__dict__

  #---------------------------------------------------------------------------
  def __str__(self):
    return self.indent + self.prefix + self.text + self.suffix

#=============================================================================
[docs]class Comment(Token):
  """Comment token.

  .. attribute:: text

    The textual content of the comment.

  .. attribute:: prefix

    The delimiter which starts this comment: ``'#'``, optionally followed by a
    lua-style long bracket (e.g. ``'[['``, ``'[===['``, etc.).

  .. attribute:: suffix

    The delimiter which ends this comment: either empty, or a lua-style long
    bracket which shall match the long bracket in :attr:`prefix`.
  """

  #---------------------------------------------------------------------------
  def __init__(self, prefix, text, indent="", suffix=""):
    text = super(Comment, self).__init__(text, indent)
    self.prefix = prefix
    self.suffix = suffix

  #---------------------------------------------------------------------------
  def __repr__(self):
    return "Comment(prefix=%(prefix)r, suffix=%(suffix)r," \
           " text=%(text)r, indent=%(indent)r)" % self.__dict__

  #---------------------------------------------------------------------------
  def __str__(self):
    return self.indent + self.prefix + self.text + self.suffix

#=============================================================================
[docs]class Command(Token):
  """Command token.

  .. attribute:: text

    The name of the command.

  .. attribute:: prefix

    The delimiter which starts the command's argument list. This shall end with
    ``'('`` and may begin with whitespace if there is whitespace separating the
    command name from the '('.

  .. attribute:: suffix

    The delimiter which ends the command's argument list. This shall end with
    ``')'`` and may begin with whitespace if there is whitespace separating the
    last argument (or the opening '(' if there are no arguments) from the ')'.

  .. attribute:: arguments

    A :class:`list` of :class:`.String` tokens which comprise the arguments of
    the command.
  """

  #---------------------------------------------------------------------------
  def __init__(self, text, arguments=[], indent="", prefix="(", suffix=")"):
    text = super(Command, self).__init__(text, indent)
    self.prefix = prefix
    self.suffix = suffix
    self.arguments = arguments

  #---------------------------------------------------------------------------
  def __repr__(self):
    return "Command(text=%(text)r, prefix=%(prefix)r," \
           " suffix=%(suffix)r, arguments=%(arguments)r," \
           " indent=%(indent)r)" % self.__dict__

  #---------------------------------------------------------------------------
  def __str__(self):
    args = "".join([str(a) for a in self.arguments])
    return self.indent + self.text + self.prefix + args + self.suffix

#=============================================================================
[docs]class CMakeScript(object):
  """Tokenized representation of a CMake script.

  .. attribute:: tokens

    The :class:`list` of tokens which comprise the script. Manipulations of
    this list should be used to change the content of the script.
  """

  _reWhitespace = re.compile(r"\s")
  _reCommand = re.compile(r"([" + string.letters + r"]\w*)(\s*\()")
  _reComment = re.compile(r"#(\[=*\[)?")
  _reQuote = re.compile("\"")
  _reBracketQuote = re.compile(r"\[=*\[")
  _reEscape = re.compile(r"\\[\\\"nrt$ ]")

  #---------------------------------------------------------------------------
  def __init__(self, content):
    """
    :param content: Textual content of a CMake script.
    :type content: :class:`basestring`

    :raises:
      :exc:`~exceptions.SyntaxError` or :exc:`~exceptions.EOFError` if a
      parsing error occurs (i.e. if the input text is not syntactically valid).

    .. code-block:: python

      with fi = open('CMakeLists.txt'):
        script = CMakeParser.CMakeScript(f.read())

      with fo = open('CMakeLists.txt.new', 'w'):
        fo.write(str(script))
    """

    self.tokens = []

    self._content = content
    self._match = None

    while len(self._content):
      indent = self._chompSpace()

      # Consume comments
      if self._is(self._reComment):
        self.tokens.append(self._parseComment(self._match, indent))

      # Consume commands
      elif self._is(self._reCommand):
        self.tokens.append(self._parseCommand(self._match, indent))

      # Consume other tokens (pedantically, if we get here, the script is
      # malformed, except at EOF)
      else:
        m = self._reWhitespace.search(self._content)
        n = m.start() if m is not None else len(self._content)
        self.tokens.append(Token(text=self._content[:n], indent=indent))
        self._content = self._content[n:]

  #---------------------------------------------------------------------------
  def __repr__(self):
    return repr(self.tokens)

  #---------------------------------------------------------------------------
  def __str__(self):
    return "".join([str(t) for t in self.tokens])

  #---------------------------------------------------------------------------
  def _chomp(self):
    result = self._content[0]
    self._content = self._content[1:]
    return result

  #---------------------------------------------------------------------------
  def _chompSpace(self):
    result = ""

    while len(self._content) and self._content[0].isspace():
      result += self._content[0]
      self._content = self._content[1:]

    return result

  #---------------------------------------------------------------------------
  def _chompString(self, end, escapes):
    result = ""

    while len(self._content):
      if escapes and self._is(self._reEscape):
        e = self._match.group(0)
        result += e
        self._content = self._content[len(e):]

      elif self._content.startswith(end):
        self._content = self._content[len(end):]
        return result

      else:
        result += self._chomp()

    raise EOFError("unexpected EOF while parsing string (expected %r)" % end)

  #---------------------------------------------------------------------------
  def _parseArgument(self, indent):
    text = ""

    while len(self._content):
      if self._is(self._reQuote) or self._is(self._reBracketQuote):
        prefix = self._match.group(0)
        self._content = self._content[len(prefix):]

        if prefix == "\"":
          suffix = prefix
          s = self._chompString(suffix, escapes=True)

        else:
          suffix = prefix.replace("[", "]")
          s = self._chompString(suffix, escapes=False)

        if not len(text):
          return String(prefix=prefix, suffix=suffix, text=s, indent=indent)

        text += prefix + s + suffix

      elif self._content[0].isspace():
        break

      elif self._is(self._reEscape):
        e = self._match.group(0)
        text += e
        self._content = self._content[len(e):]

      elif self._content[0] == ")":
        break

      else:
        text += self._chomp()

    return String(text=text, indent=indent)

  #---------------------------------------------------------------------------
  def _parseComment(self, match, indent):
    b = match.group(1)
    e = "\n" if b is None else b.replace("[", "]")
    n = self._content.find(e)
    if n < 0:
      raise EOFError("unexpected EOF while parsing comment (expected %r" % e)

    i = match.end()
    suffix = e.strip()
    token = Comment(prefix=self._content[:i], suffix=suffix,
                    text=self._content[i:n], indent=indent)

    self._content = self._content[n + len(suffix):]

    return token

  #---------------------------------------------------------------------------
  def _parseCommand(self, match, indent):
    command = match.group(1)
    prefix = match.group(2)
    arguments = []

    self._content = self._content[match.end():]

    while len(self._content):
      argIndent = self._chompSpace()

      if not len(self._content):
        break

      if self._content[0] == ")":
        self._content = self._content[1:]
        return Command(text=command, arguments=arguments, indent=indent,
                       prefix=prefix, suffix=argIndent + ")")

      else:
        arguments.append(self._parseArgument(argIndent))

    raise EOFError("unexpected EOF while parsing command (expected ')')")

  #---------------------------------------------------------------------------
  def _is(self, regex):
    self._match = regex.match(self._content)
    return self._match is not None
Navigation

Source code for SlicerWizard.CMakeParser

Quick search

Navigation