I could use some help please. I wrote this sub that is suppose to replace illegal characters with a space character. The input file is an xml document that was generated by a Mac. It seems to work if there is a single illegal character, but fails to find the second of two when there are two back to back illegal characters. Does anyone have experience on how best to clean a file of illegals?
Thank you for your help.
Private Sub purgeIllegalCharacters(ByVal filepath As String)
'create a streamreader for the document that needs to be cleaned
Dim sr As StreamReader = New StreamReader(filepath)
'start at the first character
sr.BaseStream.Position = 0
'load all of the data into a single string
Dim strData As String = sr.ReadToEnd
'load the string into a stringbuilder
Dim strBldr As New System.Text.StringBuilder(strData)
'number of characters in the stringbuilder (document)
Dim intLength As Int32
intLength = strBldr.Length()
'loop through each character
Dim intIllegals As Int16 = 0
Dim i As Int32
Dim intAscCode As Int16
Dim char1 As Char
Dim char2 As Char
For i = 0 To intLength - 1
'get the ascii code of the current character
intAscCode = Asc(strBldr.Chars(i))
'all nonprintables are ascii code 31 or less
If intAscCode < 32 Then
'keep a running total of the illegal character encountered
intIllegals = intIllegals + 1
char1 = Chr(intAscCode)
'put a space in it's place
char2 = Chr(32)
strBldr.Replace(char1, char2)
End If
Next i
If intIllegals > 0 Then
MsgBox(intIllegals.ToString & " illegal character(s) were found and cleaned.")
Else
MsgBox("No illegal characters were found.")
End If
sr.Close()
'create a new file of the same name, this will overwrite the old one
Dim f As FileInfo = New FileInfo(filepath)
'create a stream writer for the new file
Dim strmWriter As StreamWriter = f.CreateText
'write the stringbuilder with the clean data to the new file
strmWriter.Write(strBldr.ToString)
'close the streamwriter, this will also flush it
strmWriter.Close()
End Sub
Thank you for your help.
Private Sub purgeIllegalCharacters(ByVal filepath As String)
'create a streamreader for the document that needs to be cleaned
Dim sr As StreamReader = New StreamReader(filepath)
'start at the first character
sr.BaseStream.Position = 0
'load all of the data into a single string
Dim strData As String = sr.ReadToEnd
'load the string into a stringbuilder
Dim strBldr As New System.Text.StringBuilder(strData)
'number of characters in the stringbuilder (document)
Dim intLength As Int32
intLength = strBldr.Length()
'loop through each character
Dim intIllegals As Int16 = 0
Dim i As Int32
Dim intAscCode As Int16
Dim char1 As Char
Dim char2 As Char
For i = 0 To intLength - 1
'get the ascii code of the current character
intAscCode = Asc(strBldr.Chars(i))
'all nonprintables are ascii code 31 or less
If intAscCode < 32 Then
'keep a running total of the illegal character encountered
intIllegals = intIllegals + 1
char1 = Chr(intAscCode)
'put a space in it's place
char2 = Chr(32)
strBldr.Replace(char1, char2)
End If
Next i
If intIllegals > 0 Then
MsgBox(intIllegals.ToString & " illegal character(s) were found and cleaned.")
Else
MsgBox("No illegal characters were found.")
End If
sr.Close()
'create a new file of the same name, this will overwrite the old one
Dim f As FileInfo = New FileInfo(filepath)
'create a stream writer for the new file
Dim strmWriter As StreamWriter = f.CreateText
'write the stringbuilder with the clean data to the new file
strmWriter.Write(strBldr.ToString)
'close the streamwriter, this will also flush it
strmWriter.Close()
End Sub