zhuzhengyu
Technical User
I wrote a small tool,html parser.I spent serveral hours in coding ,so it must be clumsy.I hope for more advice.<br><br>the tool is programmed by java and could be used in NT(jdk1.2)<br> <br>the tool include:<br><br>in the directory:".\\check\\"<br><br>1.config file "tag.des" describe the pare of tag(must be pares ,such as<style></style>
<br><br>2. 3 java classes:<br>(1)position.java
x,y axial, means row and column number,but I have not used the column position)<br>package check;<br><br>public class position {<br><br> public int row=0;<br> public int column=0;<br><br> public position(int r,int c) {<br> row=r;<br> column=c;<br> }<br>}<br><br>(2)tag.java
record the position)<br>package check;<br><br>public class tag {<br><br> private int pushed=0;<br> private int poped=0;<br> private position posPush=null;<br> private position posPop=null;<br> private String tagName="";<br><br> public tag(String tag) {<br> tagName=tag;<br> }<br><br> public String getTag() {<br> return tagName;<br> }<br><br> public int getPushed() {<br> return pushed;<br> }<br><br> public int getPophed() {<br> return poped;<br> }<br><br> public int getCountd() {<br> return (pushed-poped);<br> }<br><br> public position getPushPos() {<br> return posPush;<br> }<br><br> public position getPopPos() {<br> return posPop;<br> }<br><br> public void push(position p) {<br> pushed++;<br> posPush=p;<br> }<br><br> public void pop(position p) {<br> poped++;<br> posPop=p;<br> }<br><br> public boolean isHead(String stoken) {<br> stoken=stoken.toLowerCase();<br> if((stoken.startsWith("<"+tagName)==true)&&(stoken.endsWith(">"
==true)){<br> return true;<br> }<br> else<br> return false;<br> }<br><br> public boolean isTail(String stoken) {<br> if((stoken.startsWith("</"
==true)&&(stoken.endsWith(">"
==true)&&(stoken.toLowerCase().indexOf(tagName)>0)){<br> return true;<br> }<br> else<br> return false;<br> }<br><br>}<br><br>(3)html.java(has 3 steps:<br> a).use paseFile() to read in the file to be checked)<br> b).use writeFile() to write out the file being arranged "out.html"<br> c).use check() to check the "out.html" and tell the position error occurs<br><br>package check;<br>import java.awt.*;<br>import java.io.*;<br>import java.util.*;<br><br>public class html extends Vector{<br><br> private Vector htmlTag=null;<br> private Stack stack=null;<br> private String string="",headStr="",tailStr="",between="";<br><br> private int tab=0,lastTab=0;<br><br> public html() {<br> String file=".\\check\\tag.des";<br> try{<br> String[] tempStr=new String[100];<br> int count=0;<br><br> BufferedReader in=new BufferedReader(new FileReader(file));<br> String line="";<br> while((line=in.readLine())!=null){<br> line=line.substring(0,line.indexOf(";"
);<br> tempStr[count]=line;<br> count++;<br> }<br><br> htmlTag=new Vector(count+1);<br> for(int i=0;i<count;i++)<br> htmlTag.addElement(new tag(tempStr<i>));<br><br> stack=new Stack();<br><br> }<br> catch(Exception error){<br> System.err.print("error"
;<br> }<br> }<br><br> private boolean isHead(String str){<br> for (int i=0;i<htmlTag.size();i++){<br> tag temp=(tag)htmlTag.elementAt(i);<br> if (temp.isHead(str)==true){<br> return true;<br> }<br> }<br> return false;<br> }<br><br> private boolean isTail(String str){<br> for (int i=0;i<htmlTag.size();i++){<br> tag temp=(tag)htmlTag.elementAt(i);<br> if (temp.isTail(str)==true){<br> return true;<br> }<br> }<br> return false;<br> }<br><br> public void parseFile(String file){<br> try{<br> BufferedReader in=new BufferedReader(new FileReader(file));<br> String line="";<br> while((line=in.readLine())!=null){<br> line=replace(line,"\t"," "
;<br> String tempHead="",tempTail="";<br> int pos=0;<br> while((pos=line.indexOf(" "
)>0){<br> tempHead=line.substring(0,pos);<br> tempTail=line.substring(pos+1).trim();<br> line=tempHead+"\r"+tempTail;<br> }<br> line=replace(line,"\r"," "
;<br> if(line!=""
string+=line;<br> }<br><br> String token="";<br> String tempStr="";<br> while((token=nextToken())!=""
{<br> tempStr=headStr;<br> if(isHead(token)==true){<br> tempStr+="\n";<br> if(lastTab==1)tab++;<br> for(int i=0;i<tab;i++){<br> tempStr+=" ";<br> }<br> lastTab=1;<br> tempStr+=token;<br> }<br> else if(isTail(token)==true){<br> if(lastTab==0)tab--;<br> if(tab<0)tab=0;<br> tempStr+="\n";<br> for(int i=0;i<tab;i++){<br> tempStr+=" ";<br> }<br> lastTab=0;<br> tempStr+=token;<br> }<br> else{<br> tempStr+="\n";<br> for(int i=0;i<tab;i++){<br> tempStr+=" ";<br> }<br> tempStr+=token;<br> }<br> headStr=tempStr;<br> }<br> string=headStr;<br> System.out.print(string);<br> }<br> catch(Exception error){<br> System.err.print("error"
;<br> }<br> }<br><br> public void writeFile(){<br> try{<br> BufferedWriter out=new BufferedWriter(new FileWriter("out.html"
);<br> out.write(string);<br> out.flush();<br> }<br> catch(Exception error){<br> }<br> }<br><br> public void check(){<br><br> try{<br> BufferedWriter out=new BufferedWriter(new FileWriter("out.err"
);<br> String err="";<br><br> String token="";<br> int count=0;<br> while((token=nextToken())!=""
{<br> count++;<br> position pos=new position(count,1);<br> if(isHead(token)){<br> tag temp=new tag(token);<br> temp.push(pos);<br> stack.push(temp);<br> }<br> else if(isTail(token)){<br> tag temp=(tag)stack.pop();<br> if(token.indexOf(temp.getTag())<=0)<br> err+="head:"+temp.getTag()+"!=tail:"+token+" "+" position
ush("+temp.getPushPos().row+","+temp.getPushPos().column+"
\n";<br> }<br> }<br> if(stack.isEmpty()==false){<br> for(int i=0;i<stack.size();i++){<br> tag popTemp=(tag)stack.pop();<br> err+="head has no matched tail--"+popTemp.getTag()+"--\n";<br> }<br> }<br><br> out.write(err);<br> out.flush();<br> }<br> catch(Exception error){<br> }<br><br> }<br><br> private String nextToken(){<br><br> int begin=string.indexOf("<"
;<br> if(begin<0)return "";<br> int end=string.indexOf(">"
;<br> if(end<0)return "";<br><br> String strToken="<"+ string.substring(begin,end+1).substring(1).trim();<br><br> between=string.substring(0,begin);<br><br> between=replace(between,"\t"," "
;<br> String tempHead="",tempTail="";<br> int pos=0;<br> while((pos=between.indexOf(" "
)>0){<br> tempHead=between.substring(0,pos);<br> tempTail=between.substring(pos+1).trim();<br> between=tempHead+"\r"+tempTail;<br> }<br> between=replace(between,"\r"," "
;<br><br> headStr+=between;<br> string=string.substring(end+1);<br> tailStr=string;<br> <br> return strToken;<br><br> }<br><br> private String replace(String string,String strOld,String strNew){<br> String newString="";<br> int i;<br> while(true){<br> i=string.indexOf(strOld);<br> if(i<=0)break;<br> newString=newString+string.substring(0,i)+strNew;<br> string=string.substring(i+strOld.length());<br> }<br> newString=newString+string;<br><br> return newString;<br> }<br><br>}<br><br>3.in the working directory ".":<br>the test java class test.java:<br>import check.*;<br><br>public class test {<br><br> public test() {<br> }<br><br> public static void main(String[] args) {<br> html test=new html();<br> test.parseFile("in.html"
;<br> test.writeFile();<br> test.check();<br> }<br>}<br><br>defects:<br>1.I dont kwon how to select a file to be input and output by java<br>2.this procedure must fisrt arrange the input file then check the arranged file<br>3.the code is not elegant, it contains lot of garbage<br><br>but the idea follows what I paste on the forum serveral days ago.<br><br>