批量检查网站可达性并返回检测结果
检查网站可达性并返回检测结果。
检测文件:website.txt中的网站列表是否存活,每行一个网站。
pip install requests tqdm bs4
脚本:
import concurrent.futures
import requests
import csv
from datetime import datetime
import socket
from tqdm import tqdm
from bs4 import BeautifulSoup
def check_website(url):
"""检查网站可达性并返回检测结果"""
result = {
"url": url,
"timestamp": datetime.now().isoformat(),
"status_code": None,
"response_time": None,
"error": None,
"success": False,
"title": "",
}
try:
# 添加URL协议前缀
if not url.startswith(("http://", "https://")):
url = "http://" + url
# 自定义 User-Agent 请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, timeout=4, allow_redirects=True, headers=headers)
result.update(
{
"status_code": response.status_code,
"response_time": response.elapsed.total_seconds(),
"success": 200 <= response.status_code < 400,
}
)
if result["success"]: # 如果请求成功
# 解析HTML内容并提取标题
soup = BeautifulSoup(response.content, "html.parser")
title_tag = soup.find("title")
if title_tag:
result["title"] = title_tag.get_text(
strip=True
) # 使用strip=True去除前后空白字符
except (
requests.exceptions.RequestException,
socket.gaierror,
ConnectionError,
) as e:
result["error"] = str(e)
except Exception as e:
result["error"] = f"Unexpected error: {str(e)}"
return result
def main():
# 读取网站列表
with open("website.txt") as f:
urls = [line.strip() for line in f if line.strip()]
# 使用线程池并发检测,并添加进度显示
with concurrent.futures.ThreadPoolExecutor(max_workers=300) as executor:
results = list(
tqdm(
executor.map(check_website, urls), total=len(urls), desc="网站检测进度"
)
)
# 写入CSV结果
with open("output.csv", "w", newline="") as csvfile:
fieldnames = [
"title",
"url",
"status_code",
"response_time",
"error",
"success",
"timestamp",
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(results)
print(f"检测完成,结果已保存到 output.csv 文件中。")
if __name__ == "__main__":
main()