Beautifulsoup4 导入模组
from bs4 import BeautifulSoup
import requests as req
Beautifulsoup4 美化 HTML 代码
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 美化 html 代码
print(soup.prettify())
Beautifulsoup4 获取 title 标签
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 获取 title 标签
print(soup.title)
Beautifulsoup4 获取 title 标签内部文字
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 获取 title 标签内部文字
print(soup.title.text)
Beautifulsoup4 获取网页第一个超链接
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 获取网页第一个超链接
print(soup.a)
Beautifulsoup4 获取网页第一个超链接的属性
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 获取网页第一个超链接的属性
print(soup.a.attrs)
Beautifulsoup4 获取所有的元素
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 获取所有的元素
for ele in soup.find_all():
print(ele)
Beautifulsoup4 只获取所有的超链接
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 只获取所有的超链接
for ele in soup.find_all("a"):
print(ele)
Beautifulsoup4 使用 id 获取元素
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 使用 id 获取元素
for ele in soup.select("#main_header"):
print(ele)
Beautifulsoup4 使用 class 获取元素
# 设定网址
url = "https://www.xyblog.cc/"
# 获取网页html
r = req.get(url)
# 导入 html 进入 beautifulsoup4
soup = BeautifulSoup(r.text, features="html.parser")
# 使用 class 获取元素
for ele in soup.select(".mt-2"):
print(ele)
版权属于:江筱雨
本文链接:https://www.yuisblog.com/archives/106/
本站未注明转载的文章均为原创,并采用
CC BY-NC-SA 4.0 授权协议,转载请注明来源,谢谢!