kiterza/数据采集.py

## 数据采集.py
from lxml import etree
# 解析本地文件 etree.parse()
# 解析服务器返回信息 etree.HTML()
import urllib.request  # 调用urllib库
import csv  # 调用csv库，用于写入
import time

# 数据采集：http://www.instu.org/da101/zhanshi.php


# 对程序函数进行批量导入
def n_abcd(str_a=' ') -> str:  # 对答案数据进行规范，由数字转换为ABCD

    if str_a == '1':
        return 'A'
    elif str_a == '2':
        return 'B'
    elif str_a == '3':
        return 'C'
    elif str_a == '4':
        return 'D'
    elif str_a == '5':
        return 'E'
    else:
        return "暂无答案"
    # 输入正确返回ABCD，输入错误无返回（可能引起报错，需要处理）


def rec1_30() -> None:
    with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
        writer = csv.writer(a)
        for i in range(1, 31):
            url = "http://www.instu.org/da101/zhanshi.php?page=" + str(i)
            UA = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                              "Chrome/100.0.4896.75 Safari/537.36 "
            }
            request = urllib.request.Request(url=url, headers=UA)
            url_response = urllib.request.urlopen(request)
            read = url_response.read().decode("utf-8")
            tree = etree.HTML(read)
            for j in range(1, 11):
                test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
                test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
                test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
                an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
                try:
                    nn: str = test_fo[0].split('#')[1]
                except:
                    nn = "暂无答案"
                data_len = len(test_p)
                if data_len == 7:
                    if nn == '1':
                        a1 = test_p[2].split('#')[0]
                        a2 = test_p[3].split('#')[2]
                        a3 = test_p[4].split('#')[2]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '2':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[0]
                        a3 = test_p[3].split('#')[2]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '3':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[2]
                        a3 = test_p[3].split('#')[0]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '4':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[2]
                        a3 = test_p[3].split('#')[2]
                        a4 = test_p[4].split('#')[0]
                    elif nn == "暂无答案":
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[2]
                        a3 = test_p[3].split('#')[2]
                        a4 = test_p[4].split('#')[2]
                    else:
                        print("异常3")
                        print(j)
                elif data_len == 8:
                    if nn == '1':
                        a1 = test_p[2].split('#')[0]
                        a2 = test_p[3].split('#')[2]
                        a3 = test_p[4].split('#')[2]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '2':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[0]
                        a3 = test_p[3].split('#')[2]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '3':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[2]
                        a3 = test_p[3].split('#')[0]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '4':
                        a1 = test_p[2].split('#')[1]
                        a2 = test_p[3].split('#')[2]
                        a3 = test_p[4].split('#')[2]
                        a4 = test_p[5].split('#')[0]
                    else:
                        print("异常")
                        print(j)
                elif data_len == 9:
                    if nn == '2':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[0]
                        a3 = test_p[3].split('#')[2]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '3':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[2]
                        a3 = test_p[3].split('#')[0]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    elif nn == '4':
                        a1 = test_p[1].split('#')[2]
                        a2 = test_p[2].split('#')[2]
                        a3 = test_p[3].split('#')[2]
                        a4 = test_p[4].split('#')[0]
                    else:
                        print("异常2")
                        print(j)
                elif data_len == 10:
                    if nn == '1':
                        a1 = test_p[2].split('#')[0]
                        a2 = test_p[3].split('#')[2]
                        a3 = test_p[4].split('#')[2]
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = ""
                    else:
                        print("异常1-10")
                        print(j)
                else:
                    print("异常1")
                    print(j)
                # 每个if都和else对应，确认没有未抛出的异常
                an1 = "A" + str(a1)
                an2 = "B" + str(a2)
                an3 = "C" + str(a3)
                an4 = "D" + str(a4)
                an5 = " "
                ank = test_p[len(test_p) - 1]
                all_list = [an, an1, an2, an3, an4, an5, n_abcd(nn), ank]
                writer.writerow(all_list)
                del all_list, an, an1, an2, an3, an4, nn, data_len, ank, an5
            print("\r", end="")
            print("开始单选数据获取：进度: {}/30，".format(i), end=" ")
        print("获取完成")


def rec31() -> None:
    with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
        writer = csv.writer(a)
        url = "http://www.instu.org/da101/zhanshi.php?page=31"
        UA = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
        }
        request = urllib.request.Request(url=url, headers=UA)
        url_response = urllib.request.urlopen(request)
        read = url_response.read().decode("utf-8")
        tree = etree.HTML(read)
        for j in range(1, 8):
            test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
            test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
            test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
            an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
            try:
                nn: str = test_fo[0].split('#')[1]
            except:
                nn = "暂无答案"
            data_len = len(test_p)
            if data_len == 7:
                if nn == '1':
                    a1 = test_p[2].split('#')[0]
                    a2 = test_p[3].split('#')[2]
                    a3 = test_p[4].split('#')[2]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '2':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[0]
                    a3 = test_p[3].split('#')[2]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '3':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[2]
                    a3 = test_p[3].split('#')[0]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '4':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[2]
                    a3 = test_p[3].split('#')[2]
                    a4 = test_p[4].split('#')[0]
                elif nn == "暂无答案":
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[2]
                    a3 = test_p[3].split('#')[2]
                    a4 = test_p[4].split('#')[2]
                else:
                    print("异常3")
                    print(j)
            elif data_len == 8:
                if nn == '1':
                    a1 = test_p[2].split('#')[0]
                    a2 = test_p[3].split('#')[2]
                    a3 = test_p[4].split('#')[2]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '2':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[0]
                    a3 = test_p[3].split('#')[2]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '3':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[2]
                    a3 = test_p[3].split('#')[0]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '4':
                    a1 = test_p[2].split('#')[1]
                    a2 = test_p[3].split('#')[2]
                    a3 = test_p[4].split('#')[2]
                    a4 = test_p[5].split('#')[0]
                else:
                    print("异常")
                    print(j)
            elif data_len == 9:
                if nn == '2':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[0]
                    a3 = test_p[3].split('#')[2]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '3':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[2]
                    a3 = test_p[3].split('#')[0]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                elif nn == '4':
                    a1 = test_p[1].split('#')[2]
                    a2 = test_p[2].split('#')[2]
                    a3 = test_p[3].split('#')[2]
                    a4 = test_p[4].split('#')[0]
                else:
                    print("异常2")
                    print(j)
            elif data_len == 10:
                if nn == '1':
                    a1 = test_p[2].split('#')[0]
                    a2 = test_p[3].split('#')[2]
                    a3 = test_p[4].split('#')[2]
                    try:
                        a4 = test_p[4].split('#')[2]
                    except:
                        a4 = ""
                else:
                    print("异常1-10")
                    print(j)
            else:
                print("异常1")
                print(j)
            # 每个if都和else对应，确认没有未抛出的异常
            an1 = "A" + str(a1)
            an2 = "B" + str(a2)
            an3 = "C" + str(a3)
            an4 = "D" + str(a4)
            an5 = " "
            ank = test_p[len(test_p) - 1]
            all_list = [an, an1, an2, an3, an4, an5, n_abcd(nn), ank]
            writer.writerow(all_list)
            print("\r", end="")
            print("开始混合数据获取：进度: {}/7，".format(j), end=" ")
            del all_list, an, an1, an2, an3, an4, nn, data_len, ank, an5
        print("获取完成")
        for j in range(8, 11):
            test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
            test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
            test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
            list_fo = []
            list_a = []
            an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
            # print(test_h, test_p)
            # print(test_fo)
            for i in test_fo:
                nn: str = i.split('#')[1]
                list_fo.append(nn)
            # print(list_fo)
            data_len = len(test_p)
            if '5' not in list_fo:
                if '1' in list_fo:
                    a1 = test_p[2].split('#')[0]
                    if '2' in list_fo:
                        a2 = test_p[3].split('#')[0]
                    else:
                        a2 = test_p[3].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[4].split('#')[0]
                    else:
                        try:
                            a3 = test_p[4].split('#')[2]
                        except:
                            a3 = " "
                    if '4' in list_fo:
                        a4 = test_p[5].split('#')[0]
                    else:
                        try:
                            a4 = test_p[5].split('#')[2]
                        except:
                            a4 = " "
                else:
                    a1 = test_p[1].split('#')[2]
                    if '2' in list_fo:
                        a2 = test_p[2].split('#')[0]
                    else:
                        a2 = test_p[2].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[3].split('#')[0]
                    else:
                        try:
                            a3 = test_p[3].split('#')[2]
                        except:
                            a3 = " "
                    if '4' in list_fo:
                        a4 = test_p[4].split('#')[0]
                    else:
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = " "
            elif '5' in list_fo:
                if '1' in list_fo:
                    a1 = test_p[2].split('#')[0]
                    if '2' in list_fo:
                        a2 = test_p[3].split('#')[0]
                    else:
                        a2 = test_p[3].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[4].split('#')[0]
                    else:
                        a3 = test_p[4].split('#')[2]
                    if '4' in list_fo:
                        a4 = test_p[5].split('#')[0]
                    else:
                        a4 = test_p[5].split('#')[2]
                    if '5' in list_fo:
                        a5 = test_p[6].split('#')[0]
                    else:
                        a5 = test_p[6].split('#')[2]
                else:
                    a1 = test_p[1].split('#')[2]
                    if '2' in list_fo:
                        a2 = test_p[2].split('#')[0]
                    else:
                        a2 = test_p[2].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[3].split('#')[0]
                    else:
                        a3 = test_p[3].split('#')[2]
                    if '4' in list_fo:
                        a4 = test_p[4].split('#')[0]
                    else:
                        a4 = test_p[4].split('#')[2]
                    if '5' in list_fo:
                        a5 = test_p[5].split('#')[0]
                    else:
                        a5 = test_p[5].split('#')[2]
            else:
                print("异常")
                print(j)

            for i in list_fo:
                list_a.append(n_abcd(i))
                #  把获取到的数字答案挨个转换为ABCDE，删除list_fo参数，节约内存
            del list_fo
            an1 = "A" + str(a1)
            an2 = "B" + str(a2)
            an3 = "C" + str(a3)
            an4 = "D" + str(a4)
            try:
                an5 = "E" + str(a5)
                del a5
            except:
                an5 = " "
            ank = test_p[len(test_p) - 1]
            all_list = [an, an1, an2, an3, an4, an5, list_a, ank]
            # print(all_list)
            writer.writerow(all_list)
            del an, an1, an2, an3, an4, an5, list_a, ank
            print("\r", end="")
            print("开始混合数据获取：进度: {}/10，".format(j), end=" ")
        print("获取完成")


def rec32_47() -> None:
    with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
        writer = csv.writer(a)
        for page in range(32, 48):
            url = "http://www.instu.org/da101/zhanshi.php?page=" + str(page)
            UA = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
            }
            request = urllib.request.Request(url=url, headers=UA)
            url_response = urllib.request.urlopen(request)
            read = url_response.read().decode("utf-8")
            tree = etree.HTML(read)
            for j in range(1, 11):
                test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
                test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
                test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
                list_fo = []
                list_a = []
                an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
                # print(test_h, test_p)
                # print(test_fo)
                for i in test_fo:
                    nn: str = i.split('#')[1]
                    list_fo.append(nn)
                # print(list_fo)
                data_len = len(test_p)
                if '5' not in list_fo:
                    if '1' in list_fo:
                        a1 = test_p[2].split('#')[0]
                        if '2' in list_fo:
                            a2 = test_p[3].split('#')[0]
                        else:
                            a2 = test_p[3].split('#')[2]
                        if '3' in list_fo:
                            a3 = test_p[4].split('#')[0]
                        else:
                            try:
                                a3 = test_p[4].split('#')[2]
                            except:
                                a3 = " "
                        if '4' in list_fo:
                            a4 = test_p[5].split('#')[0]
                        else:
                            try:
                                a4 = test_p[5].split('#')[2]
                            except:
                                a4 = " "
                    else:
                        a1 = test_p[1].split('#')[2]
                        if '2' in list_fo:
                            a2 = test_p[2].split('#')[0]
                        else:
                            a2 = test_p[2].split('#')[2]
                        if '3' in list_fo:
                            a3 = test_p[3].split('#')[0]
                        else:
                            try:
                                a3 = test_p[3].split('#')[2]
                            except:
                                a3 = " "
                        if '4' in list_fo:
                            a4 = test_p[4].split('#')[0]
                        else:
                            try:
                                a4 = test_p[4].split('#')[2]
                            except:
                                a4 = " "
                elif '5' in list_fo:
                    if '1' in list_fo:
                        a1 = test_p[2].split('#')[0]
                        if '2' in list_fo:
                            a2 = test_p[3].split('#')[0]
                        else:
                            a2 = test_p[3].split('#')[2]
                        if '3' in list_fo:
                            a3 = test_p[4].split('#')[0]
                        else:
                            a3 = test_p[4].split('#')[2]
                        if '4' in list_fo:
                            a4 = test_p[5].split('#')[0]
                        else:
                            a4 = test_p[5].split('#')[2]
                        if '5' in list_fo:
                            a5 = test_p[6].split('#')[0]
                        else:
                            a5 = test_p[6].split('#')[2]
                    else:
                        a1 = test_p[1].split('#')[2]
                        if '2' in list_fo:
                            a2 = test_p[2].split('#')[0]
                        else:
                            a2 = test_p[2].split('#')[2]
                        if '3' in list_fo:
                            a3 = test_p[3].split('#')[0]
                        else:
                            a3 = test_p[3].split('#')[2]
                        if '4' in list_fo:
                            a4 = test_p[4].split('#')[0]
                        else:
                            a4 = test_p[4].split('#')[2]
                        if '5' in list_fo:
                            a5 = test_p[5].split('#')[0]
                        else:
                            a5 = test_p[5].split('#')[2]
                else:
                    print("异常")
                    print(j)

                for i in list_fo:
                    list_a.append(n_abcd(i))
                    #  把获取到的数字答案挨个转换为ABCDE，删除list_fo参数，节约内存
                del list_fo
                an1 = "A" + str(a1)
                an2 = "B" + str(a2)
                an3 = "C" + str(a3)
                an4 = "D" + str(a4)
                try:
                    an5 = "E" + str(a5)
                    del a5
                except:
                    an5 = " "
                ank = test_p[len(test_p) - 1]
                all_list = [an, an1, an2, an3, an4, an5, list_a, ank]
                # print(all_list)
                writer.writerow(all_list)
                del an, an1, an2, an3, an4, an5, list_a, ank
            print("\r", end="")
            print("开始多选数据获取：进度: {}/32-47，".format(page), end=" ")
        print("获取完成")


def rec48() -> None:
    with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
        writer = csv.writer(a)
        url = "http://www.instu.org/da101/zhanshi.php?page=48"
        UA = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/100.0.4896.75 Safari/537.36 "
        }
        request = urllib.request.Request(url=url, headers=UA)
        url_response = urllib.request.urlopen(request)
        read = url_response.read().decode("utf-8")
        tree = etree.HTML(read)
        for j in range(1, 5):
            test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
            test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
            test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
            list_fo = []
            list_a = []
            an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
            for i in test_fo:
                nn: str = i.split('#')[1]
                list_fo.append(nn)
            # print(list_fo)
            data_len = len(test_p)
            if '5' not in list_fo:
                if '1' in list_fo:
                    a1 = test_p[2].split('#')[0]
                    if '2' in list_fo:
                        a2 = test_p[3].split('#')[0]
                    else:
                        a2 = test_p[3].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[4].split('#')[0]
                    else:
                        try:
                            a3 = test_p[4].split('#')[2]
                        except:
                            a3 = " "
                    if '4' in list_fo:
                        a4 = test_p[5].split('#')[0]
                    else:
                        try:
                            a4 = test_p[5].split('#')[2]
                        except:
                            a4 = " "
                else:
                    a1 = test_p[1].split('#')[2]
                    if '2' in list_fo:
                        a2 = test_p[2].split('#')[0]
                    else:
                        a2 = test_p[2].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[3].split('#')[0]
                    else:
                        try:
                            a3 = test_p[3].split('#')[2]
                        except:
                            a3 = " "
                    if '4' in list_fo:
                        a4 = test_p[4].split('#')[0]
                    else:
                        try:
                            a4 = test_p[4].split('#')[2]
                        except:
                            a4 = " "
            elif '5' in list_fo:
                if '1' in list_fo:
                    a1 = test_p[2].split('#')[0]
                    if '2' in list_fo:
                        a2 = test_p[3].split('#')[0]
                    else:
                        a2 = test_p[3].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[4].split('#')[0]
                    else:
                        a3 = test_p[4].split('#')[2]
                    if '4' in list_fo:
                        a4 = test_p[5].split('#')[0]
                    else:
                        a4 = test_p[5].split('#')[2]
                    if '5' in list_fo:
                        a5 = test_p[6].split('#')[0]
                    else:
                        a5 = test_p[6].split('#')[2]
                else:
                    a1 = test_p[1].split('#')[2]
                    if '2' in list_fo:
                        a2 = test_p[2].split('#')[0]
                    else:
                        a2 = test_p[2].split('#')[2]
                    if '3' in list_fo:
                        a3 = test_p[3].split('#')[0]
                    else:
                        a3 = test_p[3].split('#')[2]
                    if '4' in list_fo:
                        a4 = test_p[4].split('#')[0]
                    else:
                        a4 = test_p[4].split('#')[2]
                    if '5' in list_fo:
                        a5 = test_p[5].split('#')[0]
                    else:
                        a5 = test_p[5].split('#')[2]
            else:
                print("异常")
                print(j)

            for i in list_fo:
                list_a.append(n_abcd(i))
                #  把获取到的数字答案挨个转换为ABCDE，删除list_fo参数，节约内存
            del list_fo
            an1 = "A" + str(a1)
            an2 = "B" + str(a2)
            an3 = "C" + str(a3)
            an4 = "D" + str(a4)
            try:
                an5 = "E" + str(a5)
                del a5
            except:
                an5 = " "
            ank = test_p[len(test_p) - 1]
            all_list = [an, an1, an2, an3, an4, an5, list_a, ank]
            # print(all_list)
            writer.writerow(all_list)
            del an, an1, an2, an3, an4, an5, list_a, ank
        for j in range(5, 11):
            test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
            test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
            test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
            an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
            ann = test_p[0].split('】  ', 1)[1]
            ank = test_p[1].split('】  ', 1)[1]
            an1, an2, an3, an4, an5 = " ", " ", " ", " ", " "
            all_list = [an, an1, an2, an3, an4, an5, ann, ank]
            writer.writerow(all_list)
            del an, an1, an2, an3, an4, an5, ank
        print("多选判断混合数据获取：进度10/10，", end=" ")
        time.sleep(0.5)
        print("获取完成")


def rec49_75() -> None:
    with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
        writer = csv.writer(a)
        for page in range(49, 76):
            url = "http://www.instu.org/da101/zhanshi.php?page=" + str(page)
            UA = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                              "Chrome/100.0.4896.75 Safari/537.36 "
            }
            request = urllib.request.Request(url=url, headers=UA)
            url_response = urllib.request.urlopen(request)
            read = url_response.read().decode("utf-8")
            tree = etree.HTML(read)
            if page < 75:
                for j in range(1, 11):
                    test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
                    test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
                    test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
                    an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
                    ann = test_p[0].split('】  ', 1)[1]
                    ank = test_p[1].split('】  ', 1)[1]
                    an1, an2, an3, an4, an5 = " ", " ", " ", " ", " "
                    all_list = [an, an1, an2, an3, an4, an5, ann, ank]
                    writer.writerow(all_list)
                    del an, an1, an2, an3, an4, an5, ank
                else:
                    for j in range(1, 7):
                        test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
                        test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
                        test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
                        an: str = test_h[0].split('、', 1)[1]  # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
                        ann = test_p[0].split('】  ', 1)[1]
                        ank = test_p[1].split('】  ', 1)[1]
                        an1, an2, an3, an4, an5 = " ", " ", " ", " ", " "
                        all_list = [an, an1, an2, an3, an4, an5, ann, ank]
                        writer.writerow(all_list)
                        del an, an1, an2, an3, an4, an5, ank
            print("\r", end="")
            print("开始判断对错数据获取：进度: {}/49-75，".format(page), end=" ")
        print("获取完成")


# 导入完成


if __name__ == '__main__':
    header = ['题干', '选项A', '选项B', '选项C', '选项D', "选项E", '正确答案', '考点']
    with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
        writer = csv.writer(a)
        writer.writerow(header)
    # 1-31为单选 31为单选多选混合，32到47为多选，48为多选和判断对错混合，其余为判断对错。
    rec_open = time.time()
    print("---------------开始获取---------------")
    rec1_30()  # 对单选题进行爬取存入
    rec31()  # 对单多选混合进行爬取存入
    rec32_47()  # 对多选题进行爬取存入
    rec48()  # 对多选判断混合进行爬取存入
    rec49_75()  # 对判断进行爬取存入
    rec_end = time.time()
    time.sleep(0.5)
    print("---------------获取完毕---------------\n")
    time.sleep(1)
    atime = rec_end - rec_open
    print('全部获取完毕，耗时{:.2f}秒'.format(atime))
	from lxml import etree
	# 解析本地文件 etree.parse()
	# 解析服务器返回信息 etree.HTML()
	import urllib.request # 调用urllib库
	import csv # 调用csv库，用于写入
	import time

	# 数据采集：http://www.instu.org/da101/zhanshi.php


	# 对程序函数进行批量导入
	def n_abcd(str_a=' ') -> str: # 对答案数据进行规范，由数字转换为ABCD

	if str_a == '1':
	return 'A'
	elif str_a == '2':
	return 'B'
	elif str_a == '3':
	return 'C'
	elif str_a == '4':
	return 'D'
	elif str_a == '5':
	return 'E'
	else:
	return "暂无答案"
	# 输入正确返回ABCD，输入错误无返回（可能引起报错，需要处理）


	def rec1_30() -> None:
	with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
	writer = csv.writer(a)
	for i in range(1, 31):
	url = "http://www.instu.org/da101/zhanshi.php?page=" + str(i)
	UA = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/100.0.4896.75 Safari/537.36 "
	}
	request = urllib.request.Request(url=url, headers=UA)
	url_response = urllib.request.urlopen(request)
	read = url_response.read().decode("utf-8")
	tree = etree.HTML(read)
	for j in range(1, 11):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	try:
	nn: str = test_fo[0].split('#')[1]
	except:
	nn = "暂无答案"
	data_len = len(test_p)
	if data_len == 7:
	if nn == '1':
	a1 = test_p[2].split('#')[0]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '2':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[0]
	a3 = test_p[3].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '3':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[0]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '4':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[2]
	a4 = test_p[4].split('#')[0]
	elif nn == "暂无答案":
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[2]
	a4 = test_p[4].split('#')[2]
	else:
	print("异常3")
	print(j)
	elif data_len == 8:
	if nn == '1':
	a1 = test_p[2].split('#')[0]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '2':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[0]
	a3 = test_p[3].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '3':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[0]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '4':
	a1 = test_p[2].split('#')[1]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	a4 = test_p[5].split('#')[0]
	else:
	print("异常")
	print(j)
	elif data_len == 9:
	if nn == '2':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[0]
	a3 = test_p[3].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '3':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[0]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '4':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[2]
	a4 = test_p[4].split('#')[0]
	else:
	print("异常2")
	print(j)
	elif data_len == 10:
	if nn == '1':
	a1 = test_p[2].split('#')[0]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	else:
	print("异常1-10")
	print(j)
	else:
	print("异常1")
	print(j)
	# 每个if都和else对应，确认没有未抛出的异常
	an1 = "A" + str(a1)
	an2 = "B" + str(a2)
	an3 = "C" + str(a3)
	an4 = "D" + str(a4)
	an5 = " "
	ank = test_p[len(test_p) - 1]
	all_list = [an, an1, an2, an3, an4, an5, n_abcd(nn), ank]
	writer.writerow(all_list)
	del all_list, an, an1, an2, an3, an4, nn, data_len, ank, an5
	print("\r", end="")
	print("开始单选数据获取：进度: {}/30，".format(i), end=" ")
	print("获取完成")


	def rec31() -> None:
	with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
	writer = csv.writer(a)
	url = "http://www.instu.org/da101/zhanshi.php?page=31"
	UA = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
	}
	request = urllib.request.Request(url=url, headers=UA)
	url_response = urllib.request.urlopen(request)
	read = url_response.read().decode("utf-8")
	tree = etree.HTML(read)
	for j in range(1, 8):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	try:
	nn: str = test_fo[0].split('#')[1]
	except:
	nn = "暂无答案"
	data_len = len(test_p)
	if data_len == 7:
	if nn == '1':
	a1 = test_p[2].split('#')[0]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '2':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[0]
	a3 = test_p[3].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '3':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[0]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '4':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[2]
	a4 = test_p[4].split('#')[0]
	elif nn == "暂无答案":
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[2]
	a4 = test_p[4].split('#')[2]
	else:
	print("异常3")
	print(j)
	elif data_len == 8:
	if nn == '1':
	a1 = test_p[2].split('#')[0]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '2':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[0]
	a3 = test_p[3].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '3':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[0]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '4':
	a1 = test_p[2].split('#')[1]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	a4 = test_p[5].split('#')[0]
	else:
	print("异常")
	print(j)
	elif data_len == 9:
	if nn == '2':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[0]
	a3 = test_p[3].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '3':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[0]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	elif nn == '4':
	a1 = test_p[1].split('#')[2]
	a2 = test_p[2].split('#')[2]
	a3 = test_p[3].split('#')[2]
	a4 = test_p[4].split('#')[0]
	else:
	print("异常2")
	print(j)
	elif data_len == 10:
	if nn == '1':
	a1 = test_p[2].split('#')[0]
	a2 = test_p[3].split('#')[2]
	a3 = test_p[4].split('#')[2]
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = ""
	else:
	print("异常1-10")
	print(j)
	else:
	print("异常1")
	print(j)
	# 每个if都和else对应，确认没有未抛出的异常
	an1 = "A" + str(a1)
	an2 = "B" + str(a2)
	an3 = "C" + str(a3)
	an4 = "D" + str(a4)
	an5 = " "
	ank = test_p[len(test_p) - 1]
	all_list = [an, an1, an2, an3, an4, an5, n_abcd(nn), ank]
	writer.writerow(all_list)
	print("\r", end="")
	print("开始混合数据获取：进度: {}/7，".format(j), end=" ")
	del all_list, an, an1, an2, an3, an4, nn, data_len, ank, an5
	print("获取完成")
	for j in range(8, 11):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	list_fo = []
	list_a = []
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	# print(test_h, test_p)
	# print(test_fo)
	for i in test_fo:
	nn: str = i.split('#')[1]
	list_fo.append(nn)
	# print(list_fo)
	data_len = len(test_p)
	if '5' not in list_fo:
	if '1' in list_fo:
	a1 = test_p[2].split('#')[0]
	if '2' in list_fo:
	a2 = test_p[3].split('#')[0]
	else:
	a2 = test_p[3].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[4].split('#')[0]
	else:
	try:
	a3 = test_p[4].split('#')[2]
	except:
	a3 = " "
	if '4' in list_fo:
	a4 = test_p[5].split('#')[0]
	else:
	try:
	a4 = test_p[5].split('#')[2]
	except:
	a4 = " "
	else:
	a1 = test_p[1].split('#')[2]
	if '2' in list_fo:
	a2 = test_p[2].split('#')[0]
	else:
	a2 = test_p[2].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[3].split('#')[0]
	else:
	try:
	a3 = test_p[3].split('#')[2]
	except:
	a3 = " "
	if '4' in list_fo:
	a4 = test_p[4].split('#')[0]
	else:
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = " "
	elif '5' in list_fo:
	if '1' in list_fo:
	a1 = test_p[2].split('#')[0]
	if '2' in list_fo:
	a2 = test_p[3].split('#')[0]
	else:
	a2 = test_p[3].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[4].split('#')[0]
	else:
	a3 = test_p[4].split('#')[2]
	if '4' in list_fo:
	a4 = test_p[5].split('#')[0]
	else:
	a4 = test_p[5].split('#')[2]
	if '5' in list_fo:
	a5 = test_p[6].split('#')[0]
	else:
	a5 = test_p[6].split('#')[2]
	else:
	a1 = test_p[1].split('#')[2]
	if '2' in list_fo:
	a2 = test_p[2].split('#')[0]
	else:
	a2 = test_p[2].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[3].split('#')[0]
	else:
	a3 = test_p[3].split('#')[2]
	if '4' in list_fo:
	a4 = test_p[4].split('#')[0]
	else:
	a4 = test_p[4].split('#')[2]
	if '5' in list_fo:
	a5 = test_p[5].split('#')[0]
	else:
	a5 = test_p[5].split('#')[2]
	else:
	print("异常")
	print(j)

	for i in list_fo:
	list_a.append(n_abcd(i))
	# 把获取到的数字答案挨个转换为ABCDE，删除list_fo参数，节约内存
	del list_fo
	an1 = "A" + str(a1)
	an2 = "B" + str(a2)
	an3 = "C" + str(a3)
	an4 = "D" + str(a4)
	try:
	an5 = "E" + str(a5)
	del a5
	except:
	an5 = " "
	ank = test_p[len(test_p) - 1]
	all_list = [an, an1, an2, an3, an4, an5, list_a, ank]
	# print(all_list)
	writer.writerow(all_list)
	del an, an1, an2, an3, an4, an5, list_a, ank
	print("\r", end="")
	print("开始混合数据获取：进度: {}/10，".format(j), end=" ")
	print("获取完成")


	def rec32_47() -> None:
	with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
	writer = csv.writer(a)
	for page in range(32, 48):
	url = "http://www.instu.org/da101/zhanshi.php?page=" + str(page)
	UA = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
	}
	request = urllib.request.Request(url=url, headers=UA)
	url_response = urllib.request.urlopen(request)
	read = url_response.read().decode("utf-8")
	tree = etree.HTML(read)
	for j in range(1, 11):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	list_fo = []
	list_a = []
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	# print(test_h, test_p)
	# print(test_fo)
	for i in test_fo:
	nn: str = i.split('#')[1]
	list_fo.append(nn)
	# print(list_fo)
	data_len = len(test_p)
	if '5' not in list_fo:
	if '1' in list_fo:
	a1 = test_p[2].split('#')[0]
	if '2' in list_fo:
	a2 = test_p[3].split('#')[0]
	else:
	a2 = test_p[3].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[4].split('#')[0]
	else:
	try:
	a3 = test_p[4].split('#')[2]
	except:
	a3 = " "
	if '4' in list_fo:
	a4 = test_p[5].split('#')[0]
	else:
	try:
	a4 = test_p[5].split('#')[2]
	except:
	a4 = " "
	else:
	a1 = test_p[1].split('#')[2]
	if '2' in list_fo:
	a2 = test_p[2].split('#')[0]
	else:
	a2 = test_p[2].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[3].split('#')[0]
	else:
	try:
	a3 = test_p[3].split('#')[2]
	except:
	a3 = " "
	if '4' in list_fo:
	a4 = test_p[4].split('#')[0]
	else:
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = " "
	elif '5' in list_fo:
	if '1' in list_fo:
	a1 = test_p[2].split('#')[0]
	if '2' in list_fo:
	a2 = test_p[3].split('#')[0]
	else:
	a2 = test_p[3].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[4].split('#')[0]
	else:
	a3 = test_p[4].split('#')[2]
	if '4' in list_fo:
	a4 = test_p[5].split('#')[0]
	else:
	a4 = test_p[5].split('#')[2]
	if '5' in list_fo:
	a5 = test_p[6].split('#')[0]
	else:
	a5 = test_p[6].split('#')[2]
	else:
	a1 = test_p[1].split('#')[2]
	if '2' in list_fo:
	a2 = test_p[2].split('#')[0]
	else:
	a2 = test_p[2].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[3].split('#')[0]
	else:
	a3 = test_p[3].split('#')[2]
	if '4' in list_fo:
	a4 = test_p[4].split('#')[0]
	else:
	a4 = test_p[4].split('#')[2]
	if '5' in list_fo:
	a5 = test_p[5].split('#')[0]
	else:
	a5 = test_p[5].split('#')[2]
	else:
	print("异常")
	print(j)

	for i in list_fo:
	list_a.append(n_abcd(i))
	# 把获取到的数字答案挨个转换为ABCDE，删除list_fo参数，节约内存
	del list_fo
	an1 = "A" + str(a1)
	an2 = "B" + str(a2)
	an3 = "C" + str(a3)
	an4 = "D" + str(a4)
	try:
	an5 = "E" + str(a5)
	del a5
	except:
	an5 = " "
	ank = test_p[len(test_p) - 1]
	all_list = [an, an1, an2, an3, an4, an5, list_a, ank]
	# print(all_list)
	writer.writerow(all_list)
	del an, an1, an2, an3, an4, an5, list_a, ank
	print("\r", end="")
	print("开始多选数据获取：进度: {}/32-47，".format(page), end=" ")
	print("获取完成")


	def rec48() -> None:
	with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
	writer = csv.writer(a)
	url = "http://www.instu.org/da101/zhanshi.php?page=48"
	UA = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/100.0.4896.75 Safari/537.36 "
	}
	request = urllib.request.Request(url=url, headers=UA)
	url_response = urllib.request.urlopen(request)
	read = url_response.read().decode("utf-8")
	tree = etree.HTML(read)
	for j in range(1, 5):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	list_fo = []
	list_a = []
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	for i in test_fo:
	nn: str = i.split('#')[1]
	list_fo.append(nn)
	# print(list_fo)
	data_len = len(test_p)
	if '5' not in list_fo:
	if '1' in list_fo:
	a1 = test_p[2].split('#')[0]
	if '2' in list_fo:
	a2 = test_p[3].split('#')[0]
	else:
	a2 = test_p[3].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[4].split('#')[0]
	else:
	try:
	a3 = test_p[4].split('#')[2]
	except:
	a3 = " "
	if '4' in list_fo:
	a4 = test_p[5].split('#')[0]
	else:
	try:
	a4 = test_p[5].split('#')[2]
	except:
	a4 = " "
	else:
	a1 = test_p[1].split('#')[2]
	if '2' in list_fo:
	a2 = test_p[2].split('#')[0]
	else:
	a2 = test_p[2].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[3].split('#')[0]
	else:
	try:
	a3 = test_p[3].split('#')[2]
	except:
	a3 = " "
	if '4' in list_fo:
	a4 = test_p[4].split('#')[0]
	else:
	try:
	a4 = test_p[4].split('#')[2]
	except:
	a4 = " "
	elif '5' in list_fo:
	if '1' in list_fo:
	a1 = test_p[2].split('#')[0]
	if '2' in list_fo:
	a2 = test_p[3].split('#')[0]
	else:
	a2 = test_p[3].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[4].split('#')[0]
	else:
	a3 = test_p[4].split('#')[2]
	if '4' in list_fo:
	a4 = test_p[5].split('#')[0]
	else:
	a4 = test_p[5].split('#')[2]
	if '5' in list_fo:
	a5 = test_p[6].split('#')[0]
	else:
	a5 = test_p[6].split('#')[2]
	else:
	a1 = test_p[1].split('#')[2]
	if '2' in list_fo:
	a2 = test_p[2].split('#')[0]
	else:
	a2 = test_p[2].split('#')[2]
	if '3' in list_fo:
	a3 = test_p[3].split('#')[0]
	else:
	a3 = test_p[3].split('#')[2]
	if '4' in list_fo:
	a4 = test_p[4].split('#')[0]
	else:
	a4 = test_p[4].split('#')[2]
	if '5' in list_fo:
	a5 = test_p[5].split('#')[0]
	else:
	a5 = test_p[5].split('#')[2]
	else:
	print("异常")
	print(j)

	for i in list_fo:
	list_a.append(n_abcd(i))
	# 把获取到的数字答案挨个转换为ABCDE，删除list_fo参数，节约内存
	del list_fo
	an1 = "A" + str(a1)
	an2 = "B" + str(a2)
	an3 = "C" + str(a3)
	an4 = "D" + str(a4)
	try:
	an5 = "E" + str(a5)
	del a5
	except:
	an5 = " "
	ank = test_p[len(test_p) - 1]
	all_list = [an, an1, an2, an3, an4, an5, list_a, ank]
	# print(all_list)
	writer.writerow(all_list)
	del an, an1, an2, an3, an4, an5, list_a, ank
	for j in range(5, 11):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	ann = test_p[0].split('】 ', 1)[1]
	ank = test_p[1].split('】 ', 1)[1]
	an1, an2, an3, an4, an5 = " ", " ", " ", " ", " "
	all_list = [an, an1, an2, an3, an4, an5, ann, ank]
	writer.writerow(all_list)
	del an, an1, an2, an3, an4, an5, ank
	print("多选判断混合数据获取：进度10/10，", end=" ")
	time.sleep(0.5)
	print("获取完成")


	def rec49_75() -> None:
	with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
	writer = csv.writer(a)
	for page in range(49, 76):
	url = "http://www.instu.org/da101/zhanshi.php?page=" + str(page)
	UA = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/100.0.4896.75 Safari/537.36 "
	}
	request = urllib.request.Request(url=url, headers=UA)
	url_response = urllib.request.urlopen(request)
	read = url_response.read().decode("utf-8")
	tree = etree.HTML(read)
	if page < 75:
	for j in range(1, 11):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	ann = test_p[0].split('】 ', 1)[1]
	ank = test_p[1].split('】 ', 1)[1]
	an1, an2, an3, an4, an5 = " ", " ", " ", " ", " "
	all_list = [an, an1, an2, an3, an4, an5, ann, ank]
	writer.writerow(all_list)
	del an, an1, an2, an3, an4, an5, ank
	else:
	for j in range(1, 7):
	test_h = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/h4/text()")
	test_p = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/text()")
	test_fo = tree.xpath(r"//div[@class='div_topic'][" + str(j) + "]/p/font/text()")
	an: str = test_h[0].split('、', 1)[1] # 此处需要指定分割时仅仅分割掉第一个’、‘，防止对题目进行分割影响数据。
	ann = test_p[0].split('】 ', 1)[1]
	ank = test_p[1].split('】 ', 1)[1]
	an1, an2, an3, an4, an5 = " ", " ", " ", " ", " "
	all_list = [an, an1, an2, an3, an4, an5, ann, ank]
	writer.writerow(all_list)
	del an, an1, an2, an3, an4, an5, ank
	print("\r", end="")
	print("开始判断对错数据获取：进度: {}/49-75，".format(page), end=" ")
	print("获取完成")


	# 导入完成


	if __name__ == '__main__':
	header = ['题干', '选项A', '选项B', '选项C', '选项D', "选项E", '正确答案', '考点']
	with open('./test/数据采集.csv', 'a', newline="", encoding='utf-8') as a:
	writer = csv.writer(a)
	writer.writerow(header)
	# 1-31为单选 31为单选多选混合，32到47为多选，48为多选和判断对错混合，其余为判断对错。
	rec_open = time.time()
	print("---------------开始获取---------------")
	rec1_30() # 对单选题进行爬取存入
	rec31() # 对单多选混合进行爬取存入
	rec32_47() # 对多选题进行爬取存入
	rec48() # 对多选判断混合进行爬取存入
	rec49_75() # 对判断进行爬取存入
	rec_end = time.time()
	time.sleep(0.5)
	print("---------------获取完毕---------------\n")
	time.sleep(1)
	atime = rec_end - rec_open
	print('全部获取完毕，耗时{:.2f}秒'.format(atime))