Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations derfloh on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

Downloading lots of files

Status
Not open for further replies.

click

MIS
Joined
Jul 19, 2000
Messages
115
Location
US
I have a small problem. I have to download from a corporate webpage about 800 pdf files. They all show as links on the same page. Instead of right clicking on each one to "Save target As..." Is there a way that with VB have the program read the web page, extract all the links, or go link by link, and then download each file to a predetermined folder? I think I can cook the harvesting links part, but as to how to download each one, I don't have a clue. Could someone point me to the right direction? Thank you. [sadeyes]Click.
 

For returning the links from the web page, search this site for webbrowser, last six months. For downloading and saving the files in question search this site for XML ADO Stream using all words any date.

Good Luck

 
Here's how I do it... It is ugly but effective... I'm not going to format everything for how you want it (because I don't have time), but all the necessary parts are included... the basic flow is that the first form reads the webpage, parses out the files, then sets a couple text boxes in the second form, the second form starts a download when it senses the change in the second text field.... hope this helps in this format...

Code:
Public Function MakeDownloadList(ByVal downloaddir$, ByVal sRemoteUrl$) As String()
    'Reads a web page directory and parses the file names out into an array
    'return that array, directories are ignored... only files are saved
    Dim sDirStruct$
    Dim i As Integer, j As Integer
    Dim sLines As Variant
    Dim sLine As Variant
    Dim pattern$
    i = 0
    j = 15
    'Create temp directory to work in, delete old one if it's still around
    EraseDirectory (downloaddir)
    CreateDirectory (downloaddir)

    'Create file list by reading web folder (DIRECTORY BROWSING MUST BE ON!)
    Inet1.url = sRemoteUrl
    sDirStruct = Inet1.OpenURL(, icString)
    Dim fileList() As String
    ReDim fileList(j)
        
    pattern = &quot;*.*</A&quot;
    'Dim sFilename$
    sLines = Split(sDirStruct, &quot;>&quot;)
    For Each sLine In sLines
        If (sLine Like pattern) Then
            If (i >= j) Then
                'File count starts at j, if that is exceeded it is extended by five at a time
                j = j + 5
            ReDim Preserve fileList(j)
            End If
            sLine = Replace(sLine, &quot;</A&quot;, &quot;&quot;)
            fileList(i) = sLine
            i = i + 1
        End If
    Next
    If (i > 0) Then
        i = i - 1
    Else
        i = 0
    End If
    
    'Redimension the array to exactly fit the data so that For each can be run on it later
    ReDim Preserve fileList(i)
    MakeDownloadList = fileList
End Function  'MakeDownloadList


Public Sub GetPackage(ByVal sTo$, ByVal sFrom$)
    'Reads web folder and gets list of files, then downloads each using statusForm downloader
    Dim fileList() As String
    Dim sFile As Variant
    
    fileList = MakeDownloadList(sTo, sFrom)
    CreateDirectory (sTo)
    
    For Each sFile In fileList
        'statusForm senses when the text in Text2 is changed and responds by downloading
        'the file in text1 to the location specified in text2
        statusForm.Text1 = sFrom & sFile
        statusForm.Text2 = sTo & &quot;\&quot;
    Next
    'if it's been loaded we need to unload it
    Unload statusForm
End Sub  'GetPackage


----------------------- the status form ------------------
---- with 2 text boxes and an Inet control... you can ----

Option Explicit
Private Sub cancel_Click()
    'Do the way the form is called over and over to download the whole directory
    'I set a flag on the main form to 2 to indicate a cancellation, once this is set
    'No more downloads will happen
    Controller.Text1.Text = &quot;2&quot;
    Controller.DownloadCancelled = True
    
    Unload Me
End Sub

' Private Sub Inet1_StateChanged
' Sub handles the stateChanged event
' Parameters: State As Integer
' Return Value: n/a
Private Sub Inet1_StateChanged(ByVal State As Integer)
    'MsgBox (&quot;State change&quot;)
    ' State 12 means that there is data in the buffer to collect
    If State = 12 Then
        Dim bolDone As Boolean: bolDone = False
        Dim varData As Variant ' Data variable
        Dim byteTempArray() As Byte
        Dim intFile As Integer      ' FreeFile variable
        Dim lngFileSize As Long     ' Filesize of remote file
        Dim dblIncrease As Double   ' Increase value for progress bar
        
        intFile = FreeFile()      ' Set intFile to an unused file.
        
        ' Get the file size
        lngFileSize = Inet1.GetHeader(&quot;Content-length&quot;)
        ' Determine the increase value
        dblIncrease = FormatNumber((1024 / lngFileSize) * 100, 4)
        ' Open a file for binary access
        Open Text2.Text & &quot;\&quot; & GetFileName(Text1.Text) For Binary Access Write As #intFile
        ' Get the first chunk of data
        varData = Inet1.GetChunk(1024, icByteArray)
        DoEvents
        
        Do While Not bolDone
            byteTempArray = varData
            ' Write the content of the byte array to the opened file.
            Put #intFile, , byteTempArray
            
            If Not ProgressBar1.Value >= 100 Then
                If Not ProgressBar1.Value + dblIncrease > 100 Then
                    ProgressBar1.Value = ProgressBar1.Value + dblIncrease
                Else
                    ProgressBar1.Value = 100
                End If
            End If
            
            ' Get next chunk of data
            varData = Inet1.GetChunk(1024, icByteArray)
            DoEvents
            
            ' If we are not receiving any more, stop looping
            If Len(varData) = 0 Then
                bolDone = True
            End If
        Loop
        ' Close the file
        Close #intFile
    End If
    ' Print out the state to a label
    Label1.Caption = GetCurrentState(State)
End Sub

' --------------------------------------------------------
' Private function GetCurrentState
' Function describes a certain state with words
' Parameters: intState As Integer
' Return Value: State description As string
Private Function GetCurrentState(intState As Integer) As String
    Select Case intState
        Case 0 ' icNone
            GetCurrentState = &quot;No state to report&quot;
        Case 1 ' icHostResolvingHost
            GetCurrentState = &quot;The control is looking up the IP address of the specified host computer&quot;
        Case 2 ' icHostResolved
            GetCurrentState = &quot;The control successfully found the IP address of the specified host computer&quot;
        Case 3 ' icConnecting
            GetCurrentState = &quot;The control is connectiong to the host computer&quot;
        Case 4 ' icConnected
            GetCurrentState = &quot;The control successfully connected to the host computer&quot;
        Case 5 ' icRequesting
            GetCurrentState = &quot;The control is sending a request to the host computer&quot;
        Case 6 ' icRequestSent
            GetCurrentState = &quot;The control successfully sent the request&quot;
        Case 7 ' icReceivingResponse
            GetCurrentState = &quot;The control is receiving a response from the host computer&quot;
        Case 8 ' icResponseReceived
            'GetCurrentState = &quot;The control successfully received a response from the host computer&quot;
            GetCurrentState = &quot;Downloading &quot; & Text1.Text
        Case 9 ' icDisconnecting
            GetCurrentState = &quot;The control is disconnection from the host computer&quot;
        Case 10 ' icDisconnected
            GetCurrentState = &quot;The control successfully disconnected from the host computer&quot;
        Case 11 ' icError
            GetCurrentState = &quot;An error occured in communicating with the host computer&quot;
        Case 12 ' icResponseCompleted
            GetCurrentState = &quot;The request has completed and all data has been received&quot;
            'Without this If clause, even after a cancellation the file would be completed causing the flag to revert
            If Controller.Text1.Text <> &quot;2&quot; Then
                Controller.Text1.Text = 0
            End If
            Unload Me
        Case Else
            GetCurrentState = &quot;Unknown state: &quot; & intState
    End Select
End Function

' --------------------------------------------------------
' Private function GetFileName
' Function resolves the filename from a full url
' Parameters: strUrl As string
' Return Value: filename As string
Private Function GetFileName(strUrl As String) As String
    GetFileName = Mid(strUrl, InStrRev(strUrl, &quot;/&quot;) + 1)
End Function
Public Sub Download()
' Init the progressbar
    ProgressBar1.Value = 0
    ' Set the status label to &quot;&quot;
    Label1.Caption = &quot;&quot;
    ' The Execute method takes four optional parameters,
    ' of which we will use two, url and operation.
    ' The url parameter is a string value with any valid
    ' url and the operation can be GET, HEAD, POST, PUT.
    ' We will use GET in this example, which gives us the
    ' data of the specified url. Information about the
    ' others can be found if you follow one of the
    ' specified external links in the end of the article.
    Inet1.Execute Text1.Text, &quot;GET&quot;
    Do While Inet1.StillExecuting
        DoEvents
    Loop
End Sub


Private Sub Text2_Change()
    If Controller.Text1.Text <> &quot;2&quot; Then
        Me.Show
        Download
    End If
End Sub

You'll probably just want to ignore the cancel code, it's such a hack at the moment I haven't had a chance to clean it up.

-Rob
 
Thank you, this gives me a big head start. I much rather program and debug a couple of hours, than click 2400 times. ;-)Click
 
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top