Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations wOOdy-Soft on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

Convert HTML To RTF 1

Status
Not open for further replies.

jonnyknowsbest

Technical User
Feb 3, 2004
163
GB
How do i go about converting HTML into RTF. It needs to be able to support different font colors and styles

Regards
 
The easiest way I can think of is by using the Word Document model and opening the HTML file then saving the file in RTF format.


____________________________________________________________

Need help finding an answer?

Try the Search Facility or read FAQ222-2244 on how to get better results.

 
does this mean the user has to have Word installed?

If so then this is not an option
 
It probably does (unless just including the Word DLL's with your setup package will work but I've never done that so I'm not sure - maybe someone else knows?).

Alternatively, if you have a server that does have word installed on it, you could create a web service that did the work and returned the RTF to you.


____________________________________________________________

Need help finding an answer?

Try the Search Facility or read FAQ222-2244 on how to get better results.

 
There is another option - it's not the easiest to work with but I've gotten it to work with some success.

I was actually trying to write an Extended RichText control that supported HTML code (limited) - here is the conversion code I used - may take some posts just to break up the large segments.

Code:
'For storing the colors
Private colCol As New Collection

Public Sub SetHtml(ByVal Value As String)
    _html = Value
    ClearcolCol()
    Dim rf As String = ConvertHtml(_html)
    rf = newHeader() & rf
    Me.Rtf = rf
End Set

Private Sub ClearcolCol()
    While colCol.Count <> 0
        colCol.Remove(colCol.Count)
    End While
End Sub

Private Function newHeader() As String
    Dim thisColor As Integer
    Dim DocHead As String
    DocHead = "{\rtf1\ansi\ansicpg1252\deff0\deflang1033"
    DocHead &= "{\colortbl ;"
    For Each thisColor In rtfColors
        Dim setColor As Color = Color.FromArgb(thisColor)
        DocHead &= "\red" & setColor.R
        DocHead &= "\green" & setColor.G
        DocHead &= "\blue" & setColor.B & ";"
    Next
    DocHead &= "}"
    Return DocHead
End Function

Private Function ConvertHtml(ByVal htm As String) As String
    htm = htm.Replace("\", Chr(2) & Chr(2))
    htm = htm.Replace("{", Chr(2) & Chr(3))
    htm = htm.Replace("}", Chr(2) & Chr(4))
    Dim t As String
    t = Regex.Replace(htm, "<b>", "\b ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "</b>", "\b0 ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "<i>", "\i ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "</i>", "\i0 ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "<u>", "\ul ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "</u>", "\ulnone ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "<br>", vbCrLf, RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "<(s|strike)>", "\strike ", RegexOptions.IgnoreCase)
    t = Regex.Replace(t, "<(/s|/strike)>", "\strike0 ", RegexOptions.IgnoreCase)

    t = CheckForFont(t)


    'matched elements - now remove all the rest
    t = System.Text.RegularExpressions.Regex.Replace(t, "<[^>]*>", "")

    t = Regex.Replace(t, vbCrLf, "\par" & vbCrLf)
    t = t.Replace(Chr(2), "\")
    t = t.Replace(Chr(3), "{")
    t = t.Replace(Chr(4), "}")
    Return t
End Function
 
Here's the rest:

Code:
Private rtfColors As New cList

Private Function CheckForFont(ByVal str As String) As String
    Dim rg As New Regex("(<font[^>]*>|<\/font>)")
    Dim mc As MatchCollection = rg.Matches(str)
    If Not mc Is Nothing Then
        Dim i As Integer = 0
        While i < mc.Count
            If mc(i).Value.StartsWith("<font") Then
                Dim cf As String = GetCF(mc(i).Value)
                Dim rr As New Regex("<font[^>]*>")
                colCol.Add(cf)
                str = rr.Replace(str, cf, 1)
            ElseIf mc(i).Value = "</font>" Then
                Dim rr2 As New Regex("<\/font>")
                If colCol.Count > 0 Then colCol.Remove(colCol.Count)
                Dim ecf As String
                If colCol.Count = 0 Then
                    ecf = "\cf0 "
                Else
                    ecf = colCol.Item(colCol.Count)
                End If
                str = rr2.Replace(str, ecf, 1)
            End If
            i += 1
        End While
    End If
    Return str
End Function

Private Function GetCF(ByVal str As String) As String
    Dim rg As New Regex("color=[^>|\s]*")
    If rg.Match(str).Success Then
        Dim sM As String = rg.Match(str).Value
        'remove color=
        sM = sM.Substring(6)
        sM = sM.Trim(Chr(34))
        If sM.Length > 0 Then
            'we have a color
            Dim clr As Color = ConvertColor(sM)
            Dim iclr As Integer = AddColor(clr)
            Return "\cf" & iclr + 1 & " "
        End If
    End If
    Return "\cf0 "
End Function

Private Function AddColor(ByVal clr As Color) As Integer
    Dim inDict As Integer
    Dim colorIndex As Integer
    inDict = clr.ToArgb
    If inDict <> -1 Then
        rtfColors.add(inDict)
        colorIndex = rtfColors.exists(inDict)
    End If
    Return colorIndex
End Function


Private Class cList
    Inherits CollectionBase
    Sub New()
    End Sub
    Sub add(ByVal item As Integer)
        If exists(item) = -1 Then
            list.Add(item)
            ' Console.WriteLine("New color: " & item)
        End If
    End Sub
    Public Property item(ByVal index As Integer) As Integer
        Get
            Return list(index)
        End Get
        Set(ByVal Value As Integer)
            list(index) = Value
            ' Console.WriteLine("Setting color")
        End Set
    End Property
    Function exists(ByVal lookup As Integer) As Integer
        Dim current As Integer
        If list.Count <> 0 Then
            For current = 0 To list.Count - 1
                Dim compare As Color = Color.FromArgb(lookup)
                Dim source As Color = Color.FromArgb(list(current))
                ' This is very strange, the samme RGB color can have diffrent
                ' ARGB values ???? Maybe its reporting 'A' diffrently, oh well
                ' lets just work around that little feature
                If compare.R = source.R And _
                       compare.G = source.G And _
                       compare.B = source.B Then
                    Return current
                End If
            Next
        End If
        Return -1
    End Function
End Class
 
And the ConvertColor method:

Code:
Private Function ConvertColor(ByVal htmlcolor As String) As Color
    Select Case htmlcolor.ToLower
        Case "aliceblue"
        Case "antiquewhite"
        Case "aqua"
        Case "aquamarine"
        Case "azure"
        Case "beige"
        Case "bisque"
        Case "black"
        Case "blanchedalmond"
        Case "blue"
        Case "blueviolet"
        Case "brown"
        Case "burlywood"
        Case "cadetblue"
        Case "chartreuse"
        Case "chocolate"
        Case "coral"
        Case "cornflowerblue"
        Case "cornsilk"
        Case "crimson"
        Case "cyan"
        Case "darkblue"
        Case "darkcyan"
        Case "darkgoldenrod"
        Case "darkgray"
        Case "darkgreen"
        Case "darkkhaki"
        Case "darkmagenta"
        Case "darkolivegreen"
        Case "darkorange"
        Case "darkorchid"
        Case "darkred"
        Case "darksalmon"
        Case "darkseagreen"
        Case "darkslateblue"
        Case "darkslategray"
        Case "darkturquoise"
        Case "darkviolet"
        Case "deeppink"
        Case "deepskyblue"
        Case "dimgray"
        Case "dodgerblue"
        Case "feldspar"
        Case "firebrick"
        Case "floralwhite"
        Case "forestgreen"
        Case "fuchsia"
        Case "gainsboro"
        Case "ghostwhite"
        Case "gold"
        Case "goldenrod"
        Case "gray"
        Case "green"
        Case "greenyellow"
        Case "honeydew"
        Case "hotpink"
        Case "indianred"
        Case "indigo"
        Case "ivory"
        Case "khaki"
        Case "lavender"
        Case "lavenderblush"
        Case "lawngreen"
        Case "lemonchiffon"
        Case "lightblue"
        Case "lightcoral"
        Case "lightcyan"
        Case "lightgoldenrodyellow"
        Case "lightgrey"
        Case "lightgreen"
        Case "lightpink"
        Case "lightsalmon"
        Case "lightseagreen"
        Case "lightskyblue"
        Case "lightslateblue"
        Case "lightslategray"
        Case "lightsteelblue"
        Case "lightyellow"
        Case "lime"
        Case "limegreen"
        Case "linen"
        Case "magenta"
        Case "maroon"
        Case "mediumaquamarine"
        Case "mediumblue"
        Case "mediumorchid"
        Case "mediumpurple"
        Case "mediumseagreen"
        Case "mediumslateblue"
        Case "mediumspringgreen"
        Case "mediumturquoise"
        Case "mediumvioletred"
        Case "midnightblue"
        Case "mintcream"
        Case "mistyrose"
        Case "moccasin"
        Case "navajowhite"
        Case "navy"
        Case "oldlace"
        Case "olive"
        Case "olivedrab"
        Case "orange"
        Case "orangered"
        Case "orchid"
        Case "palegoldenrod"
        Case "palegreen"
        Case "paleturquoise"
        Case "palevioletred"
        Case "papayawhip"
        Case "peachpuff"
        Case "peru"
        Case "pink"
        Case "plum"
        Case "powderblue"
        Case "purple"
        Case "red"
        Case "rosybrown"
        Case "royalblue"
        Case "saddlebrown"
        Case "salmon"
        Case "sandybrown"
        Case "seagreen"
        Case "seashell"
        Case "sienna"
        Case "silver"
        Case "skyblue"
        Case "slateblue"
        Case "slategray"
        Case "snow"
        Case "springgreen"
        Case "steelblue"
        Case "tan"
        Case "teal"
        Case "thistle"
        Case "tomato"
        Case "turquoise"
        Case "violet"
        Case "violetred"
        Case "wheat"
        Case "white"
        Case "whitesmoke"
        Case "yellow"
        Case "yellowgreen"
        Case Else
            Try
                Dim hexChars As String = "#0123456789ABCDEF"
                For i As Integer = 0 To htmlcolor.Length - 1
                    If hexChars.IndexOf(htmlcolor.Substring(i, 1).ToUpper) = -1 Then
                        Return Color.Black
                    End If
                Next
                'valid hex characters
                htmlcolor = htmlcolor.ToUpper
                If htmlcolor.Length > 7 Or htmlcolor.Length <= 6 Then
                    If htmlcolor.Length = 3 Then htmlcolor = htmlcolor.Substring(0, 1) & htmlcolor.Substring(0, 1) & htmlcolor.Substring(1, 1) & htmlcolor.Substring(1, 1) & htmlcolor.Substring(2, 1) & htmlcolor.Substring(2, 1)
                    If htmlcolor.Length < 6 Then
                        While htmlcolor.Length <> 6
                            htmlcolor &= "0"
                        End While
                    End If
                    If htmlcolor.Length = 6 Then htmlcolor = "#" & htmlcolor
                    If htmlcolor.Length > 7 Then htmlcolor = htmlcolor.Substring(0, 7)
                    If htmlcolor.Length = 7 And Not htmlcolor.StartsWith("#") Then htmlcolor = "#" & htmlcolor.Substring(1)
                End If
            Catch ex As Exception
                Return Color.Black
            End Try
    End Select
    Try
        Dim tr As System.Drawing.ColorTranslator
        Return tr.FromHtml(htmlcolor)
    Catch ex As Exception
        Return Color.Black
    End Try
End Function
 
Hopefully that will help you out.

rtfColors actually stores the colors going into the RichTextBox - colColors just stores the current color so that it can switch back in case of nested colors.

Basically all that is supported by that is the color attribute of the font tag, and the following tags: b, i, u, br, s, strike.
 
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top