批量检查网站可达性并返回检测结果

Ranvane

2025-02-03

检查网站可达性并返回检测结果。

检测文件：website.txt中的网站列表是否存活，每行一个网站。

pip install requests tqdm bs4

脚本：

import concurrent.futures
import requests
import csv
from datetime import datetime
import socket
from tqdm import tqdm
from bs4 import BeautifulSoup


def check_website(url):
    """检查网站可达性并返回检测结果"""
    result = {
        "url": url,
        "timestamp": datetime.now().isoformat(),
        "status_code": None,
        "response_time": None,
        "error": None,
        "success": False,
        "title": "",
    }

    try:
        # 添加URL协议前缀
        if not url.startswith(("http://", "https://")):
            url = "http://" + url
        # 自定义 User-Agent 请求头
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, timeout=4, allow_redirects=True, headers=headers)

        result.update(
            {
                "status_code": response.status_code,
                "response_time": response.elapsed.total_seconds(),
                "success": 200 <= response.status_code < 400,
            }
        )
        if result["success"]:  # 如果请求成功
            # 解析HTML内容并提取标题
            soup = BeautifulSoup(response.content, "html.parser")
            title_tag = soup.find("title")
            if title_tag:
                result["title"] = title_tag.get_text(
                    strip=True
                )  # 使用strip=True去除前后空白字符

    except (
        requests.exceptions.RequestException,
        socket.gaierror,
        ConnectionError,
    ) as e:
        result["error"] = str(e)
    except Exception as e:
        result["error"] = f"Unexpected error: {str(e)}"

    return result


def main():

    # 读取网站列表
    with open("website.txt") as f:
        urls = [line.strip() for line in f if line.strip()]


    # 使用线程池并发检测，并添加进度显示
    with concurrent.futures.ThreadPoolExecutor(max_workers=300) as executor:
        results = list(
            tqdm(
                executor.map(check_website, urls), total=len(urls), desc="网站检测进度"
            )
        )

    # 写入CSV结果
    with open("output.csv", "w", newline="") as csvfile:
        fieldnames = [
            "title",
            "url",
            "status_code",
            "response_time",
            "error",
            "success",
            "timestamp",
        ]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results)

    print(f"检测完成，结果已保存到 output.csv 文件中。")


if __name__ == "__main__":
    main()