craigsboyd
IS-IT--Management
In my applications I like to provide the user a way to search for names in the database via "sounds-like". this is especially helpful when the user doesn't know the exact spelling. There are many algorithms around for doing this (soundex, metaphone, double-metaphone, phonix, q-gram, etc.) In VFP the only native way of doing this is using the SOUNDEX() or DIFFERENCE() functions, but I have found them to be less accurate than I would like. The worst thing about them is there reliance on the first letter of the word or name. So here is a version of the metaphone algorithm translated to VFP (Please note: I hope to port most of the popular sounds-like algorithms to VFP at some point).
Cut-N-Paste the fully working example code below into a prg file and run it from within VFP.
Slighthaze = NULL
craig1442@mchsi.com
Cut-N-Paste the fully working example code below into a prg file and run it from within VFP.
Code:
*********************************
*!* Example of use
*********************************
MESSAGEBOX(["FONE" compared to "PHONE"] + CHR(13)+ CHR(13) ;
+"SOUNDEX RETURNS: " + IIF(SOUNDEX("FONE") = SOUNDEX("PHONE"),"TRUE", "FALSE") + CHR(13)+ CHR(13) ;
+"METAPHONE RETURNS: " + IIF(Metaphone("FONE") = Metaphone("PHONE"),"TRUE", "FALSE"),0,"EXAMPLE 1 OF 2")
MESSAGEBOX(["KREG" compared to "CRAIG"] + CHR(13)+ CHR(13) ;
+"SOUNDEX RETURNS: " + IIF(SOUNDEX("KREG") = SOUNDEX("CRAIG"),"TRUE", "FALSE") + CHR(13)+ CHR(13) ;
+"METAPHONE RETURNS: " + IIF(Metaphone("KREG") = Metaphone("CRAIG"),"TRUE", "FALSE"),0,"EXAMPLE 2 OF 2")
DO WHILE .T.
IF MESSAGEBOX("Would you like to try a comparison of your own?",36,"GIVE IT A TRY") = 6
cFirstWord = ALLTRIM(INPUTBOX("Enter a name or word:", "FIRST WORD TO COMPARE"))
IF EMPTY(cFirstWord)
MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
LOOP
ENDIF
cSecondWord = ALLTRIM(INPUTBOX("Enter another name or word:", "SECOND WORD TO COMPARE"))
IF EMPTY(cSecondWord)
MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
LOOP
ENDIF
MESSAGEBOX(["]+cFirstWord+[" compared to "]+cSecondWord+["] + CHR(13)+ CHR(13) ;
+"SOUNDEX RETURNS: " + IIF(SOUNDEX(cFirstWord) = SOUNDEX(cSecondWord),"TRUE", "FALSE") + CHR(13)+ CHR(13) ;
+"METAPHONE RETURNS: " + IIF(Metaphone(cFirstWord) = Metaphone(cSecondWord),"TRUE", "FALSE"),0,"COMPARISON RESULTS")
ELSE
EXIT
ENDIF
ENDDO
*********************************
*!* Original C version by Michael Kuhn <rhlab!mkuhn@uunet.uu.net>
*!* [URL unfurl="true"]http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt[/URL]
*!* Metaphone algorithm translated from C to Delphi by Tom White <wcs@lnellex.com>
*!* Translated to Visual Basic by Dave White 9/10/01
*!* Translated to Visual Foxpro by Craig Boyd (Slighthaze) 10-21-2003 craig1442@mchsi.com
*********************************
FUNCTION Metaphone (lcWord)
*********************************
#DEFINE VOWELS "AEIOU"
#DEFINE FRONTV "EIY"
#DEFINE VARSON "CSPTG"
#DEFINE DBL "."
#DEFINE EXCPPAIR "AGKPW"
#DEFINE NXTLTR "ENNNR"
#DEFINE ALPHACHR "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
LOCAL lcB, lcC, lcInp, lcOutp, lnIi, lnJj
LOCAL lcCurltr, lcPrevltr, lcNextltr, lcNextltr2, lcNextltr3
LOCAL lnVowelafter, llVowelbefore, llFrontvafter, llSilent, llHard
IF PCOUNT()=0
RETURN ""
ELSE
IF TYPE("lcWord") != "C"
RETURN ""
ENDIF
ENDIF
lcInp = UPPER(lcWord)
lcInp = CHRTRAN(lcInp,CHRTRAN(lcInp,ALPHACHR,""),"") &&Remove all non-alpha characters
lcOutp = ""
IF LEN(lcInp) = 0
RETURN ""
ENDIF
*!*--Check rules at beginning of word
IF LEN(lcInp) > 1
lcB = SUBSTR(lcInp, 1, 1)
lcC = SUBSTR(lcInp, 2, 1)
lnIi = ATC(lcB, EXCPPAIR)
lnJj = ATC(lcC, NXTLTR)
IF lnIi = lnJj AND lnIi > 0
lcInp = SUBSTR(lcInp, 2, LEN(lcInp) - 1)
ENDIF
ENDIF
IF SUBSTR(lcInp, 1, 1) = "X"
STUFF(lcInp, 1, 1) = "S"
ENDIF
IF SUBSTR(lcInp, 1, 2) = "WH"
lcInp = "W" + SUBSTR(lcInp, 3)
ENDIF
IF RIGHT(lcInp, 1) = "S"
lcInp = LEFT(lcInp, LEN(lcInp) - 1)
ENDIF
lnIi = 0
DO WHILE (lnIi > LEN(lcInp))
lnIi = lnIi + 1
*!*--Main LOOP!
llSilent = .F.
llHard = .F.
lcCurltr = SUBSTR(lcInp, lnIi, 1)
llVowelbefore = .F.
lcPrevltr = " "
IF lnIi > 1
lcPrevltr = SUBSTR(lcInp, lnIi - 1, 1)
IF InStrC(lcPrevltr, VOWELS) > 0
llVowelbefore = .T.
ENDIF
ENDIF
IF ((lnIi = 1) AND (InStrC(lcCurltr, VOWELS) > 0))
lcOutp = lcOutp + lcCurltr
LOOP
ENDIF
llVowelafter = .F.
llFrontvafter = .F.
lcNextltr = " "
IF lnIi < LEN(lcInp)
lcNextltr = SUBSTR(lcInp, lnIi + 1, 1)
IF InStrC(lcNextltr, VOWELS) > 0
llVowelafter = .T.
ENDIF
IF InStrC(lcNextltr, FRONTV) > 0
llFrontvafter = .T.
ENDIF
ENDIF
*!*--Skip double letters EXCEPT ones in variable double
IF InStrC(lcCurltr, DBL) = 0
IF lcCurltr = lcNextltr
LOOP
ENDIF
ENDIF
lcNextltr2 = " "
IF LEN(lcInp) - lnIi > 1
lcNextltr2 = SUBSTR(lcInp, lnIi + 2, 1)
ENDIF
lcNextltr3 = " "
IF (LEN(lcInp) - lnIi) > 2
lcNextltr3 = SUBSTR(lcInp, lnIi + 3, 1)
ENDIF
DO CASE
CASE lcCurltr = "B"
llSilent = .F.
IF (lnIi = LEN(lcInp)) AND (lcPrevltr = "M")
llSilent = .T.
ENDIF
IF NOT (llSilent)
lcOutp = lcOutp + lcCurltr
ENDIF
CASE lcCurltr = "C"
IF NOT ((lnIi > 2) AND (lcPrevltr = "S") AND llFrontvafter)
IF ((lnIi > 1) AND (lcNextltr = "I") AND (lcNextltr2 = "A"))
lcOutp = lcOutp + "X"
ELSE
IF llFrontvafter
lcOutp = lcOutp + "S"
ELSE
IF ((lnIi > 2) AND (lcPrevltr = "S") AND (lcNextltr = "H"))
lcOutp = lcOutp + "K"
ELSE
IF lcNextltr = "H"
IF ((lnIi = 1) AND (InStrC(lcNextltr2, VOWELS) = 0))
lcOutp = lcOutp + "K"
ELSE
lcOutp = lcOutp + "X"
ENDIF
ELSE
IF lcPrevltr = "C"
lcOutp = lcOutp + "C"
ELSE
lcOutp = lcOutp + "K"
ENDIF
ENDIF
ENDIF
ENDIF
ENDIF
ENDIF
CASE lcCurltr = "D"
IF ((lcNextltr = "G") AND (InStrC(lcNextltr2, FRONTV) > 0))
lcOutp = lcOutp + "J"
ELSE
lcOutp = lcOutp + "T"
ENDIF
CASE lcCurltr = "G"
llSilent = .F.
IF ((lnIi < LEN(lcInp)) AND (lcNextltr = "H") AND (InStrC(lcNextltr2, VOWELS) = 0))
llSilent = .T.
ENDIF
DO CASE
CASE lcCurltr = ((lnIi = LEN(lcInp) - 4) AND (lcNextltr = "N") AND (lcNextltr2 = "E") AND (lcNextltr3 = "D"))
llSilent = .T.
CASE lcCurltr = ((lnIi = LEN(lcInp) - 2) AND (lcNextltr = "N"))
llSilent = .T.
ENDCASE
IF (lcPrevltr = "D") AND llFrontvafter
llSilent = .T.
ENDIF
IF lcPrevltr = "G"
llHard = .T.
ENDIF
IF NOT (llSilent)
IF llFrontvafter AND (NOT (llHard))
lcOutp = lcOutp + "J"
ELSE
lcOutp = lcOutp + "K"
ENDIF
ENDIF
CASE lcCurltr = "H"
llSilent = .F.
IF InStrC(lcPrevltr, VARSON) > 0
llSilent = .T.
ENDIF
IF llVowelbefore AND (NOT (llVowelafter))
llSilent = .T.
ENDIF
IF NOT llSilent
lcOutp = lcOutp + lcCurltr
ENDIF
CASE lcCurltr = "F" OR lcCurltr = "J" OR lcCurltr = "L" OR lcCurltr = "M" OR lcCurltr = "N" OR lcCurltr = "R"
lcOutp = lcOutp + lcCurltr
CASE lcCurltr = "K" IF lcPrevltr <> "C"
lcOutp = lcOutp + lcCurltr
CASE lcCurltr = "P"
IF lcNextltr = "H"
lcOutp = lcOutp + "F"
ELSE
lcOutp = lcOutp + "P"
ENDIF
CASE lcCurltr = "Q"
lcOutp = lcOutp + "K"
CASE lcCurltr = "S"
IF ((lnIi > 2) AND (lcNextltr = "I") AND ((lcNextltr2 = "O") OR (lcNextltr2 = "A")))
lcOutp = lcOutp + "X"
ENDIF
IF (lcNextltr = "H")
lcOutp = lcOutp + "X"
ELSE
lcOutp = lcOutp + "S"
ENDIF
CASE lcCurltr = "T"
IF ((lnIi > 0) AND (lcNextltr = "I") AND ((lcNextltr2 = "O") OR (lcNextltr2 = "A")))
lcOutp = lcOutp + "X"
ENDIF
DO CASE
CASE lcNextltr = "H"
IF ((lnIi > 1) OR (InStrC(lcNextltr2, VOWELS) > 0))
lcOutp = lcOutp + "0"
ELSE
lcOutp = lcOutp + "T"
ENDIF
CASE NOT ((lnIi < LEN(lcInp) - 3) AND (lcNextltr = "C") AND (lcNextltr2 = "H"))
lcOutp = lcOutp + "T"
ENDCASE
CASE lcCurltr = "V"
lcOutp = lcOutp + "F"
CASE lcCurltr = "W" OR lcCurltr = "Y"
IF (lnIi < LEN(lcInp) - 1) AND llVowelafter
lcOutp = lcOutp + lcCurltr
ENDIF
CASE lcCurltr = "X"
lcOutp = lcOutp + "KS"
CASE lcCurltr = "Z"
lcOutp = lcOutp + "S"
ENDCASE
ENDDO
RETURN lcOutp
ENDFUNC
*********************************
FUNCTION InStrC (lcSearchIn, lcSoughtCharacters)
*********************************
*!*--- Returns the position of the first character in lcSearchIn that is contained
*!*--- in the string lcSoughtCharacters. Returns 0 if none found.
LOCAL i, lnReturn
lnReturn = 0
lcSoughtCharacters = UPPER(lcSoughtCharacters)
FOR i = 1 TO LEN(lcSearchIn)
IF ATC(SUBSTR(lcSearchIn, i, 1), lcSoughtCharacters) > 0
lnReturn = i
EXIT
ENDIF
ENDFOR
RETURN lnReturn
ENDFUNC
Slighthaze = NULL
craig1442@mchsi.com
"Whom computers would destroy, they must first drive mad." - Anon