// Copyright 2000 Crispin Perdue <cris@perdues.com>
// 
// This is free software, and comes with NO WARRANTY.
// You may distribute it under the terms of the Library GNU Public License.
// See http://www.gjt.org/doc/lgpl/license.html for details.

package com.perdues;

import java.util.*;
import java.io.*;


/**
   XML subset parser.  The primary entry points are the load()
   and loadResource() methods.
 */
public class XP {

  //// Constructors ////

  /**
     Constructs an XP parser from the given reader.  This
     in all cases reads to end of file, but does not parse
     the input.
   */
  public XP(Reader reader) {
    this.reader = reader;
  }


  //// Public Static Methods ////

  /**
     Loads an XML file or archive member given a "system resource"
     name referring to a file or archive member of some
     element of CLASSPATH.  A "system resource name" is a path relative
     to some element of CLASSPATH, with elements of the path
     separated by "/".  This returns an array of XMLTag objects
     representing the top-level XML tags in the document.
     <P>
     Referring to configuration files using system resource
     names can be handy for distribution of default or pre-built
     configurations, because the config file can be distributed
     with Java classes, and setting up the CLASSPATH automatically
     configures the path to the config file.  A pre-built config file
     can even be distributed as part of a JAR.
  */
  public static XMLTag[] loadResource(String resourceName) throws IOException {
    InputStream in = null;
    in = ClassLoader.getSystemResourceAsStream(resourceName);
    if (in==null)
      throw new IllegalArgumentException("Resource not found: "+resourceName);
    Reader reader = new InputStreamReader(in);
    try {
      return load(reader);
    } finally {
      reader.close();
    }
  }


  /**
     This method loads XML from the given file, returning an
     array of XMLTag objects representing the top-level XML tags
     in the file.
   */
  public static XMLTag[] load(File file)
    throws FileNotFoundException, IOException {
    Reader reader = new FileReader(file);
    try {
      return load(reader);
    } finally {
      reader.close();
    }
  }


  /**
     This method loads XML from the given Reader, returning an
     array of XMLTag objects representing the top-level XML tags
     in the input.  This will read to end of file, but you must
     close the reader yourself.
   */
  public static XMLTag[] load(Reader reader) throws IOException {
    return new XP(reader).parse();
  }


  //// Public Instance Methods ////

  /**
     Parses the text from this XP's input, returning an
     array of XMLTag objects representing the top-level XML
     tags in the document.
   */
  public XMLTag[] parse() throws IOException {
    // Initialize the input data from the Reader.
    in = PString.readAll(reader);
    length = in.length();

    XMLTag base = new XMLTag("", false);
    state.push(base);
    while (true) {
      XMLTag top = (XMLTag)state.peek();
      XMLTag tag = parseTag();
      // System.err.println("Tag="+tag);
      if (tag==null) {
	// EOF has occurred.
	if (top==base)
	  return base.getElements();
	else
	  throw expectation
	    ("At EOF, expecting closing tag for "+top.getType());
      }

      if (!tag.isEndTag())
	top.addElement(tag);

      if (tag.isEndTag()) {
	if (tag.getType().equals(top.getType())) {
	  state.pop();
	} else {
	  throw expectation("Expecting closing tag for "
			    +top.getType());
	}
      } else if (!tag.isEmpty()) {
	state.push(tag);
      }
    }
  }


  /**
     Parse an XML tag and return a XMLTag object representing it.
     This does not parse any content for nonempty tags, nor any
     tags that might be contained between this and a matching end
     tag.  This does not read past the end of the first tag in
     the remaining input.
   */
  public XMLTag parseTag() {
    
    char c = 0;
    boolean endTag = false;
  comments:
    while (true) {
      while (true) {
	c = next();
	if (c==EOF)
	  return null;
	if (c=='<')
	  break;
      }
      c = next();
      if (c=='/') {
	endTag = true;
	break comments;
      } else if (c=='!') {
	c = next();
	if (c!='-') {
	  back();
	  back();
	}
	c = next();
	if (c!='-') {
	  back();
	  back();
	  back();
	}
	while (true) {
	  c = next();
	  if (c!='-') continue;
	  c = next();
	  if (c!='-') continue;
	  c = next();
	  if (c=='>') break;
	}
      } else {
	// Not "!"
	back();
	break;
      }
    }

    String tagname = parseIdentifier();
    // System.err.println("tag name='"+tagname+"'");

    XMLTag tag = new XMLTag(tagname, endTag);
    
    if (endTag) {
      expect('>');
      return tag;
    } else {
      while (true) {
	skipWhitespace();
	c = next();
	if (c=='>') {
	  return tag;
	} else if (c=='/') {
	  expect('>');
	  tag.setEmpty(true);
	  // System.err.println("Tag="+tag);
	  return tag;
	}
	back();
	String key = parseIdentifier();
	// System.err.println("attr='"+key+"'");
	expect('=');
	String value = parseValue();
	// System.err.println("value='"+value+"'");
	tag.put(key, unescape(value));
      }
    }
  }


  //// Private Methods ////

  /**
     Return the next character or EOF.
  */
  private char next() {
    return (pos>=length) ? EOF : in.charAt(pos++);
  }


  /**
     Return the next character or throw
     an Expectation with the given message.
  */
  private char next(String eofMessage) {
    char c = next();
    if (c==EOF)
      throw new Expectation(eofMessage);
    return c;
  }


  /**
     Read the next character and compare with the given one.
     Throw an Expectation with suitable message if EOF
     occurs or the next non-whitespace does not match.
  */
  private void expect(char c) {
    skipWhitespace();
    if (next()==c) {
      return;
    } else {
      throw expectation("Expecting '"+c+"'");
    }
  }


  /**
     Back up the input cursor by one character.
  */
  private void back() {
    pos--;
  }


  private void skipWhitespace() {
    char c = '\0';
    for (c=next(); Character.isWhitespace(c); c=next()) {}
    back();
  }


  /**
     Parse a Java identifier and return it as a String, or null
     if an identifier is not the next token.
  */
  private String parseIdentifier() {
    skipWhitespace();
    int mark = pos;
    while (true) {
      char c = next();
      if (!Character.isJavaIdentifierPart(c)) {
	back();
	if (mark==pos)
	  return null;
	else {
	  return in.substring(mark, pos);
	}
      }
    }
  }
    

  /**
     Return all characters between the current position
     and the next occurrence of the target, excluding
     the target.
  */
  private String scanTo(char target) {
    int mark = pos;
    while (true) {
      char c = next();
      if (c==target)
	return in.substring(mark, pos-1);
      else if (c==EOF)
	throw new Expectation("Unexpected EOF while scanning for'"+target+"'");
    }
  }


  /**
     Parse an XML value, allowing values not enclosed
     in quotes and terminated by whitespace.
  */
  private String parseValue() {
    skipWhitespace();
    if (next()=='"') {
      return scanTo('"');
    } else {
      back();
      int mark = pos;
      while (true) {
	char c = next();
	if (c=='/' || c=='>' || Character.isWhitespace(c)) {
	  back();
	  return in.substring(mark, pos);
	}
      }
    }
  }


  /**
     Convert character entities in the given text String
     to their corresponding actual characters and return
     the result.
  */
  private String unescape(String text) {
    StringBuffer output = new StringBuffer();
    for (int i=0; i<text.length(); i++) {
      char c = text.charAt(i);
      if (c=='&') {
	// If it's recognized, skip the entity
	// name and write the equivalent character.
	int pos = text.indexOf(';', i);
	if (pos>i) {
	  String entity = text.substring(i+1, pos);
	  if ("amp".equals(entity)) {
	    output.append('&');
	    i = pos;
	    continue;
	  } else if ("lt".equals(entity)) {
	    output.append('<');
	    i = pos;
	    continue;
	  } else if ("gt".equals(entity)) {
	    output.append('>');
	    i = pos;
	    continue;
	  } else if ("quot".equals(entity)) {
	    output.append('"');
	    i = pos;
	    continue;
	  } else if ("apos".equals(entity)) {
	    output.append('\'');
	    i = pos;
	    continue;
	  }
	}
      } else {
	output.append(c);
      } // if (c=='&')
    } // for
    return output.toString();
  }


  public Expectation expectation(String msg) {
    int eol = in.indexOf('\n', pos);
    if (eol<0)
      eol=in.length();
    int bol = in.lastIndexOf('\n', pos-1);
    int line = 1;
    for (int i=0; i<pos; i++) {
      if (in.charAt(i)=='\n') line++;
    }
    String first = in.substring(bol+1, pos);
    String rest = in.substring(pos, eol);
    return new Expectation("In line "+line+"\n'"+first+"@"+rest+" '\n"+msg);
  }


  //// Inner Classes ////

  public class Expectation extends RuntimeException {

    protected Expectation(String message) {
      super(message);
    }

  }


  //// Main, for testing ////

  public static void main(String[] args) {
    try {
      System.err.println(PString.toString(load(new File(args[0]))));
    } catch(Throwable t) {
      t.printStackTrace();
    }
  }


  //// Private Data ////

  private static final char EOF = 65535; // 2^16-1

  private Reader reader;
  private String in;
  private int pos = 0;
  private int length;

  private Stack state = new Stack();

}
