Sunday, September 11, 2011

XML Sequence Delimiter

An XML  sequence delimiter might be required for processing XML streams without the boiler plate code that comes with well known XML Parsers. A simple use case might be converting xml transformations on the fly like producing a partial output of the xml transformation as the user punches the script, etc.  This one uses a simple pattern based filter and recursion to achieve it. The delimiter is assumed to be comma (,). Note that the same can be used with a Scanner to better the stream capturing and matching. Worst case complexity is O(n) where n is the number of xml chunks and space complexity is O(m+n).

package utilities;

import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Breaks up a block of XML into comma delimited elements in Order
*
*/
public class BreakXMLIntoGroups {

Pattern emptyElementPattern=Pattern.compile(“(\\s*\n*<\\w+/>\\s*\n*)(.*)”,Pattern.DOTALL);
Pattern nonEmptyElementPattern=Pattern.compile(“(\\s*\n*<\\w+>.*?</\\w+>\\s*\n*)(.*)”,Pattern.DOTALL);

String delimiter=”,”;
List<String> list=new LinkedList<String>();
String result=new String();

private List<String> getList(String nonXSLElements){

if (nonXSLElements==null || nonXSLElements.length()==0) {
return list;
}

Matcher m1=emptyElementPattern.matcher(nonXSLElements);
Matcher m2=nonEmptyElementPattern.matcher(nonXSLElements);

if (m1.matches()){

list.add(m1.group(1).trim());
getList(m1.group(2)); //The Remaining

} else if (m2.matches()){

list.add(m2.group(1).trim());
getList(m2.group(2)); //The Remaining

}else {
//This is an ending xml element… echo back
list.add(nonXSLElements.trim());

}
return list;
}

public String getGroup(String nonXSLElements){

list.clear();
List<String> tempResultList=getList(nonXSLElements);
if (delimiter.trim().equals(“,”)){
Pattern formatDelmitedResult=Pattern.compile(“\\[(.*)\\]“,Pattern.DOTALL);
Matcher matcher=formatDelmitedResult.matcher(tempResultList.toString());
if (matcher.matches()){
return matcher.group(1).toString();
}
} else {
//Iterate over the list and prepare the result
for(int i=0;i<tempResultList.size()-1;i++){
result+=tempResultList.get(i)+delimiter;
}
result+=tempResultList.get(tempResultList.size()-1); //Add the last one without the delimiter suffix
return result;

}
return null;
}

public static void main(String[] a){

StringBuffer str=new StringBuffer();
str.append(“<A/>”);
str.append(“<A/><B>Test1″);
str.append(“</B>”);
str.append(“<C>Test2</C>”);
str.append(“<A/>”);
str.append(“<D>”);
str.append(“Test3</D>”);
str.append(“<E>Test4</E>”);
str.append(“<E>Test4</E>”);

BreakXMLIntoGroups b=new BreakXMLIntoGroups();
System.out.println(b.getGroup(str.toString()));
}
}


Output:

<A/>, <A/>, <B>Test1</B>, <C>Test2</C>, <A/>, <D>Test3</D>, <E>Test4</E>, <E>Test4</E>

0 comments:

Post a Comment