Source

Structris / AsbFilter.py

Full commit
#!/usr/bin/env python

import re, sys

allLines = set()
referencedLines = set()
firstRefdLine = None
lineSubMap = {}

##############################################################################
class TransformError(Exception):
  """ Error raised if we encounter a circumstance that can't be recovered from. """
  def __init__(self, message):
    Exception.__init__(self, message)
    

##############################################################################
def abortTransform(message):
  """ Raises an exception that aborts the transformation. """
  
  raise TransformError(message)
  
      
##############################################################################
class BasicLine:
  """ Holds information about one line of the BASIC program. """
  
  def __init__(self, lineCount, origText, prevIsCall):
    (self.lineCount, self.origText, self.prevIsCall) = (lineCount, origText, prevIsCall)
    
    # Figure out the indent level, line number, and text of the statements
    m = re.match("^(\s*)(\+?\d*)(.*)$", origText)
    self.indent = m.group(1)
    if m.group(2):
      if m.group(2).startswith("+"):
        self.lineNum = m.group(2)
      else:
        self.lineNum = int(m.group(2));
    else:
      self.lineNum = ""
    stmtsText = m.group(3)
    self.pieces = []
    
    # Break up multiple statements on the line into a list. Handle strings
    # and REM statements correctly. Put separators into their own pieces.
    i = s = 0
    inString = False
    while i < len(stmtsText):
      c = stmtsText[i]
      if c == '"':
        inString = not inString
        i += 1
      elif inString:
        i += 1
      elif re.match("^\s*:\s*", stmtsText[i:]):
        self.pieces.append(stmtsText[s:i])
        m = re.match("^\s*:\s*", stmtsText[i:])
        self.pieces.append(m.group(0))
        s = i = i + len(m.group(0))
      elif i == s and re.match("^\s+", stmtsText[i:]):
        m = re.match("^\s+", stmtsText[i:])
        self.pieces.append(m.group(0))
        s = i = i + len(m.group(0))
      elif re.match("^REM.*", stmtsText[i:], re.IGNORECASE):
        i = len(stmtsText)
      else:
        i += 1
        
    # Finish up.
    self.pieces.append(stmtsText[s:i])
  
  def matchPieces(self, regex, flags = re.IGNORECASE):
    return [p for p in self.pieces if re.match(regex, p.strip(), flags)]

  def isCall(self):
    return self.matchPieces("CALL")
  
  def isRem(self):
    return self.matchPieces("REM")
    
  def isBlank(self):  
    return not self.matchPieces(".")

  def mustBeNumbered(self):
  
    # Blank lines never need numbers, and neither does "NEW"
    if self.isBlank(): return False
    if self.matchPieces("NEW"): return False
    
    # Anything besides REM needs a number
    if [p for p in self.pieces if not re.match("^$|REM", p.strip(), re.I)]:
      return True
    
    # Any lower-case or non-monitor chars? Don't treat as needing line preservation.
    if self.matchPieces(".*[^A-Z0-9:\-+<.]", 0): return True
      
    # This is a blank line or REM statement. Preserve numbers for REMs right
    # after a call in case they're Lammer data.
    return not self.prevIsCall
    
  def __str__(self):
    return self.indent + str(self.lineNum) + "".join(self.pieces)


##############################################################################
def calcLineMark(fromLine, lineNum):
  """ Determine the shorthand version of a line, if any. """

  if not lineNum: return lineNum
  if not int(lineNum) in lineSubMap: return lineNum
  lineNum = int(lineNum)
  if fromLine >= 0 and lineSubMap[fromLine] != lineSubMap[lineNum]:
    abortTransform("Illegal ref to %s inside another sub from line %d" \
                   % (lineNum, fromLine))
  return "+" + str(lineNum - lineSubMap[lineNum])


##############################################################################
def transformForEditing(lines):
  """ Perform transformations to make the BASIC program easy to edit. This
      makes it un-pasteable however, and it needs to be transformed back
      before pasting. """
      
  global firstRefdLine
  
  # Identify every line number that is referenced
  for line in lines:
    for piece in line.pieces:
      m = re.match(".*(THEN|GOTO|GOSUB)(\s*\d+.*)$", piece, re.IGNORECASE)
      if not m: continue
      for lineNum in re.findall("\d+", m.group(2)):
        lineNum = int(lineNum)
        referencedLines.add(lineNum)
        if not firstRefdLine or lineNum < firstRefdLine:
          firstRefdLine = lineNum
          
  # Replace line number references within a sub
  for line in lines:
    newPieces = []
    for piece in line.pieces:
      m = re.match("(.*)(THEN|GOTO|GOSUB)(.*)", piece, re.IGNORECASE)
      if m:
        piece = m.group(1) + m.group(2) + \
                re.sub("\d+", lambda m: calcLineMark(line.lineNum, m.group(0)), m.group(3))
      newPieces.append(piece)
    line.pieces = newPieces
    
    if line.lineNum > firstRefdLine and not line.lineNum in referencedLines:
      line.lineNum = ""
      if line.pieces and line.pieces[0].strip() == "":
        del line.pieces[0]
    else:
      line.lineNum = calcLineMark(None, line.lineNum)
      
  return lines


##############################################################################
def undoLineMark(lineNum, mark):
  """ Translate a "+123" mark to a real line number in the current sub """

  assert lineNum != ""  
  subsBefore = [int(ln) for ln in sorted(lineSubMap.values()) if int(ln) <= int(lineNum)]
  if not subsBefore:
    abortTransform("Cannot find subroutine for '%s' at line %d" % (mark, lineNum))
  target = subsBefore[-1] + int(mark[1:])
  if not target in allLines:
    abortTransform("Unknown target %d for '%s' at line %d" % (target, mark, lineNum))
  return str(target)
  
  
##############################################################################
def flushBlock(block, startLine, endLine, out):
  """ Auto-number a block of lines with the given start and end constraints. """
  
  numLinesNeeded = len([l for l in block if l.mustBeNumbered()])
  if numLinesNeeded == 0:
    out.extend(block)
    return
    
  s = startLine if startLine else 0
  e = endLine if endLine else s + 10*numLinesNeeded
  
  incr = (e - s) / numLinesNeeded
  if incr >= 10:
    incr = 10
  elif incr >= 5:
    incr = 5
  elif incr >= 2:
    incr = 2
  elif incr >= 1:
    incr = 1
  else:
    abortTransform("Cannot auto-number block %d-%d" % (s, e))
    
  #print "flush(s=%d, e=%d, i=%d)" % (s, e, incr)
    
  cur = s
  for line in block:
    #print "  Number line '%s': %d" % (str(line.lineNum) + " " + "".join(line.pieces), cur)
    if line.lineNum:
      cur = line.lineNum + incr
    elif line.mustBeNumbered():
      
      assert cur != endLine
      assert cur not in allLines
      allLines.add(cur)
      line.lineNum = cur

      # If necessary, add a space between line num and statement(s)
      if line.pieces[0].strip() != "":
        line.pieces.insert(0, " ");
    
      cur += incr
      
    # Translate "+123" marks to real line numbers
    newPieces = []
    for piece in line.pieces:
      if not re.match("REM", piece, re.I):      
        m = re.match("(.*)(THEN|GOTO|GOSUB)(.*)", piece, re.IGNORECASE)
        if m:
          piece = m.group(1) + m.group(2) + \
                  re.sub("\+\d+", lambda m: undoLineMark(line.lineNum, m.group(0)), m.group(3))
      newPieces.append(piece)
      
    # Moving on
    line.pieces = newPieces      
      
    out.append(line)
    
      
##############################################################################
def transformForApple(lines):
  """ Un-do the editing transformations, so the program can be pasted into
      an Apple II emulator. """
      
  # Gather blocks of lines that need to be numbered
  out = []
  startLine = 0
  block = []
  for line in lines:
    if line.lineNum != "":
      flushBlock(block, startLine, line.lineNum, out)
      block = [line]
      startLine = line.lineNum
    else:
      block.append(line)
  flushBlock(block, startLine, None, out)
  return out
      
      
##############################################################################
def main():

  # Read in the file.
  origLines = [re.sub("\n$", "", line) for line in open(sys.argv[1], "r")]
  
  # Parse each line
  lines = []
  prevIsCall = False
  nNumbered = 0
  for i in range(len(origLines)):
    nl = BasicLine(i, origLines[i], prevIsCall)
    lines.append(nl)
    if nl.lineNum != "":
      nNumbered += 1
    prevIsCall = nl.isCall() or (prevIsCall and nl.isRem())
  
  # Determine where the subroutine boundaries are, and assign each line
  # to a sub when possible
  curSub = None
  for line in lines:
    found = False
    for piece in line.pieces:
      if re.match("REM\W+SUB", piece, re.IGNORECASE):
        found = True
      elif len(piece.strip()) > 0:
        break
    if found and line.lineNum != "":
      curSub = line.lineNum
    elif curSub >= 0 and line.lineNum:
      if str(line.lineNum).startswith("+"):
        line.lineNum = curSub + int(line.lineNum[1:])
      if line.lineNum > curSub:
        lineSubMap[line.lineNum] = curSub
        
  # Figure out the set of all line numbers
  global allLines
  allLines = set([ln.lineNum for ln in lines]) 
  
  # Transform either for editing or for Apple
  try:
    if nNumbered < len(lines)/2:
      lines = transformForApple(lines)
    else:
      lines = transformForEditing(lines)
      
    # All done.
    for line in lines:
      print line
      
  except TransformError, e:
    print "*** Transformation error: " + str(e) + "***"
    for line in origLines:
      print line
      

##############################################################################
main()