/* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/ import java.io.IOException; import java.io.StringReader; import java.net.URL; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; import org.xmlpull.v1.XmlPullParserFactory; /** * Modified example (based on http://www.xml.com/pub/a/2002/08/14/xmlpull.html?page=2) * to better deal with recursive XHTML structure and take advantage * of XmlPull API . * * @author Aleksander Slominski * */ public class XHTMLOutliner { public static void main(String[] args) { if (args.length == 0) { System.err.println("Usage: java XHTMLOutliner url" ); return; } String input = args[0]; // change to true and recompile to check that parser implementing XmlPull is validating final boolean CHECK_VALIDATION = true; try { XmlPullParserFactory factory = XmlPullParserFactory.newInstance(); factory.setNamespaceAware(true); //this is always suppoted!!! //setting to true may fail if parseimplis not validating if(CHECK_VALIDATION) factory.setValidating(true); XmlPullParser parser = factory.newPullParser(); System.out.println("parser="+parser); if(!CHECK_VALIDATION) { URL u = new URL(input); parser.setInput(u.openStream(), null); } else { // example of invalid XHTML based parser.setInput(new StringReader( "\n" +"simple document" +"

This

invalid

example" +"

a simple paragraph

" )); } int event = parser.next(); while ( (event = parser.next()) != XmlPullParser.END_DOCUMENT) { if (event == XmlPullParser.START_TAG) { if (isHeader(parser.getName())) { printHeaderText(parser); } } } } catch (XmlPullParserException e) { //System.out.println(e); e.printStackTrace(); } catch (IOException e) { System.out.println("IOException while parsing " + input); e.printStackTrace(); } } /** * Print header content (all TEXT events). */ private static void printHeaderText(XmlPullParser parser) throws XmlPullParserException, IOException { // using SKIP pattern that prints TEXT content // http://www.extreme.indiana.edu/~aslom/xmlpull/patterns.html#SKIP int level = 1; while( level > 0 ) { int evenType = parser.next(); if (evenType == XmlPullParser.TEXT) { System.out.print(parser.getText()); } else if (evenType == XmlPullParser.END_TAG) { --level; } else if (evenType == XmlPullParser.START_TAG) { ++level; } } } /** * Determine if this is an XHTML heading element or not * @param String name: tag name * @return boolean true if this is h1, h2, h3, h4, h5, or h6; false * otherwise */ private static boolean isHeader(String name) { if (name.equals("h1")) return true; if (name.equals("h2")) return true; if (name.equals("h3")) return true; if (name.equals("h4")) return true; if (name.equals("h5")) return true; if (name.equals("h6")) return true; return false; } }