robsuttonjr
MIS
Below is a procedure which if you pass it a DHL tracking number it will use internet explorer behind the scenes to go to dhl's website and pass back the package details into a cursor. This demonstrates 'scraping' data without using any third party addons. I am specifically after table '14' in the html page. However using this method you can figure out how to scrape any html table. I spent a portion of a day searching the internet. Like I tell everyone, you can do anything in FOX!
LPARAMETERS lcTrackingNumber
CREATE CURSOR Tdhltracking (TrackingNumber C(50), STATUS C(50),ScanDate C(20), ScanTime C(20), City C(50), State C(50), Location C(50), DateTM C(50))
loIE = CREATEOBJECT("InternetExplorer.Application")
loIE.VISIBLE = 0 && NOT VISIBLE, DEFAULT BUT ...
loIE.NAVIGATE(" && TRACK THE PACKAGE
DO WHILE loIE.Busy && WAIT UNTIL OBJECT IS DONE CONNECTING AND PASSING STRING
*NOTHING
ENDDO
DO WHILE loIE.DOCUMENT.ReadyState <> "complete" && WAIT UNTIL DOCUMENT IS FULLY LOADED
*NOTHING
ENDDO
*************************************************************************************
** SCAN THROUGH THE WEB DOCUMENT AND INSERT WHAT IS WANTED INTO TEMP CURSOR
varTables = loIE.DOCUMENT.ALL.tags("TABLE")
vartable = varTables.ITEM(14) && STATUS TABLE
FOR lnRowCount = 1 TO (vartable.ROWS.LENGTH-2) && MAY WANT LAST ROW EVENTUALLY (WOULD BE -1 TO GET IT)
*?vartable.ROWS.LENGTH
varRow = vartable.ROWS.ITEM(lnRowCount)
varcell = varRow.cells(0) && DATETIME
lcDatetm = UPPER(varcell.innerText)
varcell = varRow.cells(1) && STATUS
lcStatus = UPPER(varcell.innerText)
varcell = varRow.cells(2) && LOCATION
lcLocation = UPPER(varcell.innerText)
INSERT INTO Tdhltracking (TrackingNumber, STATUS,ScanDate, ScanTime, City, State, Location,DateTM) VALUES(lcTrackingNumber, lcStatus,[],[],[],[], lcLocation, lcDatetm)
ENDFOR
*************************************************************************************
*************************************************************************************
** MASSAGE THE DATA
SELECT Tdhltracking
GO TOP
SCAN
IF !EMPTY(LEFT(DateTM,10))
lcDate = ALLTRIM(LEFT(DateTM,10))
ENDIF
lcTime = ALLTRIM(RIGHT(ALLTRIM(DateTM),8))
IF EMPTY(lcDate)
REPLACE ScanDate WITH lcLastDate
ELSE
REPLACE ScanDate WITH lcDate
ENDIF
REPLACE ScanTime WITH lcTime
REPLACE STATUS WITH STRTRAN(ALLTRIM(UPPER(STATUS)),[.],[])
REPLACE City WITH LEFT(ALLTRIM(Location),LEN(ALLTRIM(Location))-4)
REPLACE State WITH RIGHT(ALLTRIM(Location),2)
lcLastDate = lcDate
ENDSCAN
**
*************************************************************************************
*************************************************************************************
** USED FOR TESTING AND DEV
*RS* FOR lnTableCount = 0 TO (varTables.Length-1)
*RS* vartable = vartables.item(lnTableCount)
*RS*
*RS* FOR lnRowCount = 0 TO (vartable.Rows.length-1)
*RS* varRow = varTable.Rows.Item(lnRowCount)
*RS* FOR lnCellCount = 0 TO (varRow.cells.Length-1)
*RS* varcell = varRow.cells(lnCellCount)
*RS* _cliptext = _cliptext + CHR(13)
*RS*
*RS* ENDFOR
*RS* ENDFOR
*RS* ENDFOR
**
*************************************************************************************
*************************************************************************************
**
loIE.QUIT() && CLEANUP (if quit is not used you will get an orphan iexeplorer found in taskman 'processes'
**
*************************************************************************************
Regards,
Rob
LPARAMETERS lcTrackingNumber
CREATE CURSOR Tdhltracking (TrackingNumber C(50), STATUS C(50),ScanDate C(20), ScanTime C(20), City C(50), State C(50), Location C(50), DateTM C(50))
loIE = CREATEOBJECT("InternetExplorer.Application")
loIE.VISIBLE = 0 && NOT VISIBLE, DEFAULT BUT ...
loIE.NAVIGATE(" && TRACK THE PACKAGE
DO WHILE loIE.Busy && WAIT UNTIL OBJECT IS DONE CONNECTING AND PASSING STRING
*NOTHING
ENDDO
DO WHILE loIE.DOCUMENT.ReadyState <> "complete" && WAIT UNTIL DOCUMENT IS FULLY LOADED
*NOTHING
ENDDO
*************************************************************************************
** SCAN THROUGH THE WEB DOCUMENT AND INSERT WHAT IS WANTED INTO TEMP CURSOR
varTables = loIE.DOCUMENT.ALL.tags("TABLE")
vartable = varTables.ITEM(14) && STATUS TABLE
FOR lnRowCount = 1 TO (vartable.ROWS.LENGTH-2) && MAY WANT LAST ROW EVENTUALLY (WOULD BE -1 TO GET IT)
*?vartable.ROWS.LENGTH
varRow = vartable.ROWS.ITEM(lnRowCount)
varcell = varRow.cells(0) && DATETIME
lcDatetm = UPPER(varcell.innerText)
varcell = varRow.cells(1) && STATUS
lcStatus = UPPER(varcell.innerText)
varcell = varRow.cells(2) && LOCATION
lcLocation = UPPER(varcell.innerText)
INSERT INTO Tdhltracking (TrackingNumber, STATUS,ScanDate, ScanTime, City, State, Location,DateTM) VALUES(lcTrackingNumber, lcStatus,[],[],[],[], lcLocation, lcDatetm)
ENDFOR
*************************************************************************************
*************************************************************************************
** MASSAGE THE DATA
SELECT Tdhltracking
GO TOP
SCAN
IF !EMPTY(LEFT(DateTM,10))
lcDate = ALLTRIM(LEFT(DateTM,10))
ENDIF
lcTime = ALLTRIM(RIGHT(ALLTRIM(DateTM),8))
IF EMPTY(lcDate)
REPLACE ScanDate WITH lcLastDate
ELSE
REPLACE ScanDate WITH lcDate
ENDIF
REPLACE ScanTime WITH lcTime
REPLACE STATUS WITH STRTRAN(ALLTRIM(UPPER(STATUS)),[.],[])
REPLACE City WITH LEFT(ALLTRIM(Location),LEN(ALLTRIM(Location))-4)
REPLACE State WITH RIGHT(ALLTRIM(Location),2)
lcLastDate = lcDate
ENDSCAN
**
*************************************************************************************
*************************************************************************************
** USED FOR TESTING AND DEV
*RS* FOR lnTableCount = 0 TO (varTables.Length-1)
*RS* vartable = vartables.item(lnTableCount)
*RS*
*RS* FOR lnRowCount = 0 TO (vartable.Rows.length-1)
*RS* varRow = varTable.Rows.Item(lnRowCount)
*RS* FOR lnCellCount = 0 TO (varRow.cells.Length-1)
*RS* varcell = varRow.cells(lnCellCount)
*RS* _cliptext = _cliptext + CHR(13)
*RS*
*RS* ENDFOR
*RS* ENDFOR
*RS* ENDFOR
**
*************************************************************************************
*************************************************************************************
**
loIE.QUIT() && CLEANUP (if quit is not used you will get an orphan iexeplorer found in taskman 'processes'
**
*************************************************************************************
Regards,
Rob