我来晚了,这是几年前我为澳大利亚编写的Excel VBA脚本。可以轻松对其进行修改以支持其他国家。我在这里建立了C#代码的GitHub存储库。我已经将其托管在我的网站上,您可以在这里下载:http : //jeremythompson.net/rocks/ParseAddress.xlsm
战略
对于任何邮政编码为数字或可以与RegEx匹配的国家/地区,我的策略都非常有效:
首先,我们检测被认为是第一行的名字和姓氏。通过取消选中复选框(如下所示,称为“名称为第一行”),可以轻松跳过名称并从地址开始。
接下来,可以安全地期望由街道和数字组成的地址位于郊区,而St,Pde,Ave,Av,Rd,Cres,环路等是分隔符。
检测郊区与州甚至国家/地区之间的关系可能会欺骗最复杂的解析器,因为可能会发生冲突。为了克服这个问题,我使用了PostCode查找,它是基于以下事实:剥离了街道和公寓/单元号以及PoBox,Ph,Fax,Mobile等后,仅会保留PostCode号。这很容易与regEx匹配,然后查找郊区和国家/地区。
您的国家邮政局服务将免费提供带有郊区和州的邮政编码列表,您可以将其存储在excel工作表,db表,text / json / xml文件等中。
- 最后,由于某些邮政编码具有多个郊区,因此我们检查地址中出现了哪个郊区。
例
VBA代码
免责声明,我知道这段代码并不完美,甚至编写得还不错,但是它很容易转换为任何编程语言并可以在任何类型的应用程序中运行。根据您所在的国家和法规,该策略的答案是正确的,以该代码为例:
Option Explicit
Private Const TopRow As Integer = 0
Public Sub ParseAddress()
Dim strArr() As String
Dim sigRow() As String
Dim i As Integer
Dim j As Integer
Dim k As Integer
Dim Stat As String
Dim SpaceInName As Integer
Dim Temp As String
Dim PhExt As String
On Error Resume Next
Temp = ActiveSheet.Range("Address")
'Split info into array
strArr = Split(Temp, vbLf)
'Trim the array
For i = 0 To UBound(strArr)
strArr(i) = VBA.Trim(strArr(i))
Next i
'Remove empty items/rows
ReDim sigRow(LBound(strArr) To UBound(strArr))
For i = LBound(strArr) To UBound(strArr)
If Trim(strArr(i)) <> "" Then
sigRow(j) = strArr(i)
j = j + 1
End If
Next i
ReDim Preserve sigRow(LBound(strArr) To j)
'Find the name (MUST BE ON THE FIRST ROW UNLESS CHECKBOX UNTICKED)
i = TopRow
If ActiveSheet.Shapes("chkFirst").ControlFormat.Value = 1 Then
SpaceInName = InStr(1, sigRow(i), " ", vbTextCompare) - 1
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("FirstName") = VBA.Left(sigRow(i), SpaceInName)
Else
If MsgBox("First Name: " & VBA.Mid$(sigRow(i), 1, SpaceInName), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("FirstName") = VBA.Left(sigRow(i), SpaceInName)
End If
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("Surname") = VBA.Mid(sigRow(i), SpaceInName + 2)
Else
If MsgBox("Surame: " & VBA.Mid(sigRow(i), SpaceInName + 2), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Surname") = VBA.Mid(sigRow(i), SpaceInName + 2)
End If
sigRow(i) = ""
End If
'Find the Street by looking for a "St, Pde, Ave, Av, Rd, Cres, loop, etc"
For i = 1 To UBound(sigRow)
If Len(sigRow(i)) > 0 Then
For j = 0 To 8
If InStr(1, VBA.UCase(sigRow(i)), Street(j), vbTextCompare) > 0 Then
'Find the position of the street in order to get the suburb
SpaceInName = InStr(1, VBA.UCase(sigRow(i)), Street(j), vbTextCompare) + Len(Street(j)) - 1
'If its a po box then add 5 chars
If VBA.Right(Street(j), 3) = "BOX" Then SpaceInName = SpaceInName + 5
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("Street") = VBA.Mid(sigRow(i), 1, SpaceInName)
Else
If MsgBox("Street Address: " & VBA.Mid(sigRow(i), 1, SpaceInName), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Street") = VBA.Mid(sigRow(i), 1, SpaceInName)
End If
'Trim the Street, Number leaving the Suburb if its exists on the same line
sigRow(i) = VBA.Mid(sigRow(i), SpaceInName) + 2
sigRow(i) = Replace(sigRow(i), VBA.Mid(sigRow(i), 1, SpaceInName), "")
GoTo PastAddress:
End If
Next j
End If
Next i
PastAddress:
'Mobile
For i = 1 To UBound(sigRow)
If Len(sigRow(i)) > 0 Then
For j = 0 To 3
Temp = Mb(j)
If VBA.Left(VBA.UCase(sigRow(i)), Len(Temp)) = Temp Then
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("Mobile") = VBA.Mid(sigRow(i), Len(Temp) + 2)
Else
If MsgBox("Mobile: " & VBA.Mid(sigRow(i), Len(Temp) + 2), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Mobile") = VBA.Mid(sigRow(i), Len(Temp) + 2)
End If
sigRow(i) = ""
GoTo PastMobile:
End If
Next j
End If
Next i
PastMobile:
'Phone
For i = 1 To UBound(sigRow)
If Len(sigRow(i)) > 0 Then
For j = 0 To 1
Temp = Ph(j)
If VBA.Left(VBA.UCase(sigRow(i)), Len(Temp)) = Temp Then
'TODO: Detect the intl or national extension here.. or if we can from the postcode.
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("Phone") = VBA.Mid(sigRow(i), Len(Temp) + 3)
Else
If MsgBox("Phone: " & VBA.Mid(sigRow(i), Len(Temp) + 3), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Phone") = VBA.Mid(sigRow(i), Len(Temp) + 3)
End If
sigRow(i) = ""
GoTo PastPhone:
End If
Next j
End If
Next i
PastPhone:
'Email
For i = 1 To UBound(sigRow)
If Len(sigRow(i)) > 0 Then
'replace with regEx search
If InStr(1, sigRow(i), "@", vbTextCompare) And InStr(1, VBA.UCase(sigRow(i)), ".CO", vbTextCompare) Then
Dim email As String
email = sigRow(i)
email = Replace(VBA.UCase(email), "EMAIL:", "")
email = Replace(VBA.UCase(email), "E-MAIL:", "")
email = Replace(VBA.UCase(email), "E:", "")
email = Replace(VBA.UCase(Trim(email)), "E ", "")
email = VBA.LCase(email)
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("Email") = email
Else
If MsgBox("Email: " & email, vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Email") = email
End If
sigRow(i) = ""
Exit For
End If
End If
Next i
'Now the only remaining items will be the postcode, suburb, country
'there shouldn't be any numbers (eg. from PoBox,Ph,Fax,Mobile) except for the Post Code
'Join the string and filter out the Post Code
Temp = Join(sigRow, vbCrLf)
Temp = Trim(Temp)
For i = 1 To Len(Temp)
Dim postCode As String
postCode = VBA.Mid(Temp, i, 4)
'In Australia PostCodes are 4 digits
If VBA.Mid(Temp, i, 1) <> " " And IsNumeric(postCode) Then
If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then
ActiveSheet.Range("PostCode") = postCode
Else
If MsgBox("Post Code: " & postCode, vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("PostCode") = postCode
End If
'Lookup the Suburb and State based on the PostCode, the PostCode sheet has the lookup
Dim mySuburbArray As Range
Set mySuburbArray = Sheets("PostCodes").Range("A2:B16670")
Dim suburbs As String
For j = 1 To mySuburbArray.Columns(1).Cells.Count
If mySuburbArray.Cells(j, 1) = postCode Then
'Check if the suburb is listed in the address
If InStr(1, UCase(Temp), mySuburbArray.Cells(j, 2), vbTextCompare) > 0 Then
'Set the Suburb and State
ActiveSheet.Range("Suburb") = mySuburbArray.Cells(j, 2)
Stat = mySuburbArray.Cells(j, 3)
ActiveSheet.Range("State") = Stat
'Knowing the State - for Australia we can get the telephone Ext
PhExt = PhExtension(VBA.UCase(Stat))
ActiveSheet.Range("PhExt") = PhExt
'remove the phone extension from the number
Dim prePhone As String
prePhone = ActiveSheet.Range("Phone")
prePhone = Replace(prePhone, PhExt & " ", "")
prePhone = Replace(prePhone, "(" & PhExt & ") ", "")
prePhone = Replace(prePhone, "(" & PhExt & ")", "")
ActiveSheet.Range("Phone") = prePhone
Exit For
End If
End If
Next j
Exit For
End If
Next i
End Sub
Private Function PhExtension(ByVal State As String) As String
Select Case State
Case Is = "NSW"
PhExtension = "02"
Case Is = "QLD"
PhExtension = "07"
Case Is = "VIC"
PhExtension = "03"
Case Is = "NT"
PhExtension = "04"
Case Is = "WA"
PhExtension = "05"
Case Is = "SA"
PhExtension = "07"
Case Is = "TAS"
PhExtension = "06"
End Select
End Function
Private Function Ph(ByVal Num As Integer) As String
Select Case Num
Case Is = 0
Ph = "PH"
Case Is = 1
Ph = "PHONE"
'Case Is = 2
'Ph = "P"
End Select
End Function
Private Function Mb(ByVal Num As Integer) As String
Select Case Num
Case Is = 0
Mb = "MB"
Case Is = 1
Mb = "MOB"
Case Is = 2
Mb = "CELL"
Case Is = 3
Mb = "MOBILE"
'Case Is = 4
'Mb = "M"
End Select
End Function
Private Function Fax(ByVal Num As Integer) As String
Select Case Num
Case Is = 0
Fax = "FAX"
Case Is = 1
Fax = "FACSIMILE"
'Case Is = 2
'Fax = "F"
End Select
End Function
Private Function State(ByVal Num As Integer) As String
Select Case Num
Case Is = 0
State = "NSW"
Case Is = 1
State = "QLD"
Case Is = 2
State = "VIC"
Case Is = 3
State = "NT"
Case Is = 4
State = "WA"
Case Is = 5
State = "SA"
Case Is = 6
State = "TAS"
End Select
End Function
Private Function Street(ByVal Num As Integer) As String
Select Case Num
Case Is = 0
Street = " ST"
Case Is = 1
Street = " RD"
Case Is = 2
Street = " AVE"
Case Is = 3
Street = " AV"
Case Is = 4
Street = " CRES"
Case Is = 5
Street = " LOOP"
Case Is = 6
Street = "PO BOX"
Case Is = 7
Street = " STREET"
Case Is = 8
Street = " ROAD"
Case Is = 9
Street = " AVENUE"
Case Is = 10
Street = " CRESENT"
Case Is = 11
Street = " PARADE"
Case Is = 12
Street = " PDE"
Case Is = 13
Street = " LANE"
Case Is = 14
Street = " COURT"
Case Is = 15
Street = " BLVD"
Case Is = 16
Street = "P.O. BOX"
Case Is = 17
Street = "P.O BOX"
Case Is = 18
Street = "PO BOX"
Case Is = 19
Street = "POBOX"
End Select
End Function