I/O模型
# 使用 SELECT 实现 HTTP 请求
import socket
from urllib.parse import urlparse
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
selector = DefaultSelector()
urls = ["https://www.baidu.com"]
stop = False
class HttpClient:
# 回调函数:处理发送数据
def handle_send(self, key):
selector.unregister(key.fd)
self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8"))
# 继续监听是否可读,然后回调指定的方法
selector.register(self.client.fileno(), EVENT_READ, self.handle_read)
# 回调函数:接收返回数据
def handle_read(self, key):
recv = self.client.recv(1024)
if recv:
self.data += recv
else:
selector.unregister(key.fd)
data = self.data.decode("utf8")
html_data = data.split("\r\n\r\n")[1]
print(html_data)
self.client.close()
# 解决 Windows 下报错问题
urls.remove(self.orig_url)
if not urls:
global stop
stop = True
def get_html(self, url):
self.orig_url = url
url = urlparse(url)
self.host = url.netloc
self.path = url.path
self.data = b""
if self.path == "":
self.path = "/"
self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 使用非阻塞IO
self.client.setblocking(False)
try:
self.client.connect((self.host, 80))
except BlockingIOError as e:
pass
# 注册到 Selector,然后回调指定的方法
selector.register(self.client.fileno(), EVENT_WRITE, self.handle_send)
'''
SELECT + 回调 + 事件循环:不停地请求 Socket 的状态并调用对应的回调函数
这种`SELECT + 回调 + 事件循环`的方式性能比单纯使用 Socket 要高
因为它将一个完整的步骤拆分成了若干个阶段,每个阶段都是独立且不阻塞的,相互之间也没有耦合
但回调过多会造成代码难以维护,难于追踪,共享变量困难,以及难于处理异常的问题
'''
# 事件循环
def loop():
# # Linux 下适用
# while True:
# events = selector.select()
# for key, mask in events:
# callback = key.data
# callback(key)
# Windows/Linux 下适用
while not stop:
events = selector.select()
for key, mask in events:
callback = key.data
callback(key)
if __name__ == "__main__":
httpClient = HttpClient()
httpClient.get_html("https://www.baidu.com")
loop()
大约 5 分钟