与其他情况相反,BeautifulSoup搜索操作BeautifulSoup.NavigableString
在text=
用作标准时会提供[对象列表] BeautifulSoup.Tag
。检查对象的__dict__
属性以查看对您可用的属性。在这些属性中,由于BS4的变化parent
而受到青睐。previous
from BeautifulSoup import BeautifulSoup
from pprint import pprint
import re
html_text = """
<h2>this is cool #12345678901</h2>
<h2>this is nothing</h2>
<h2>this is interesting #126666678901</h2>
<h2>this is blah #124445678901</h2>
"""
soup = BeautifulSoup(html_text)
pattern = re.compile(r'cool')
pprint(soup.find(text=pattern).__dict__)
print soup.find('h2')
print soup.find('h2', text=pattern)
print soup.find('h2', text=pattern).parent
print soup.find('h2', text=pattern) == soup.find('h2')
print soup.find('h2', text=pattern) == soup.find('h2').text
print soup.find('h2', text=pattern).parent == soup.find('h2')