# 自定义文件保存路径,例如保存到桌面
file_path = "C:\\Users\\你的电脑用户名\\Desktop\\文件名.txt"

# 创建或打开txt文件,准备写入内容,注意编码格式哦,这里是utf-8
with open(file_path, "w", encoding="utf-8") as file:
# 写入章节标题
for h1 in h1_elements:
file.write("本文标题: " + h1.text + "\n")

# 写入正文内容,并且删除不需要的文本
for p in p_elements:
cleaned_text = p.text.replace("请收藏本站:。笔趣阁手机版:", "").replace("『点此报错』", "").replace("『加入书签』", "")
file.write("本文正文: " + cleaned_text + "\n")

# 成功后输出提示
print(f"内容已写入 {file_path} 文件")


import requests
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen

# 创建一个会话对象
session = requests.session()

# 设置请求的URL
host = ""

# 设置请求的页面
page = ""

def requestUrl(url):
# 设置请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
# 创建请求对象
req = Request(url, headers=headers)
# 打开URL
html = urlopen(req)
# 读取HTML内容并解码为utf-8格式
html ='utf-8')
# 返回HTML内容
return html

# 这里下面开始的部分根据你爬取的实际网页标签进行修改
def getPage(page):
# 获取网页的html数据
html = requestUrl(page)
# 加载html文档为soup结构
soup = BeautifulSoup(html, "html.parser")
# 查找章节列表 <h1 class="wap_none">
h1_elements = soup.find_all('h1', attrs={'class': 'wap_none'})
# 正文 <div class="Readarea ReadAjax_content">
p_elements = soup.find_all('div', attrs={'class': 'Readarea ReadAjax_content'})

# 自定义文件保存路径,例如保存到桌面
file_path = "C:\\Users\\xingchen\\Desktop\\novel_content.txt"

# 创建或打开txt文件,准备写入内容
with open(file_path, "w", encoding="utf-8") as file:
# 写入章节标题
for h1 in h1_elements:
file.write("本文标题: " + h1.text + "\n")

# 写入正文内容,删除不需要的文本
for p in p_elements:
cleaned_text = p.text.replace("请收藏本站:。笔趣阁手机版:", "").replace("『点此报错』", "").replace("『加入书签』", "")
file.write("本文正文: " + cleaned_text + "\n")

print(f"内容已写入 {file_path} 文件")




# 添加换行符
cleaned_text_with_breaks = cleaned_text.replace("。", "。\n").replace("!", "!\n").replace("?", "?\n")
file.write("本文正文: " + cleaned_text_with_breaks + "\n\n")


import requests
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen

# 创建一个会话对象
session = requests.session()

# 设置请求的URL
host = ""

# 设置请求的页面
page = ""

def requestUrl(url):
# 设置请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
# 创建请求对象
req = Request(url, headers=headers)
# 打开URL
html = urlopen(req)
# 读取HTML内容并解码为utf-8格式
html ='utf-8')
# 返回HTML内容
return html

# 这里下面开始的部分根据你爬取的实际网页标签进行修改
def getPage(page):
# 获取网页的html数据
html = requestUrl(page)
# 加载html文档为soup结构
soup = BeautifulSoup(html, "html.parser")
# 查找章节列表 <h1 class="wap_none">
h1_elements = soup.find_all('h1', attrs={'class': 'wap_none'})
# 正文 <div class="Readarea ReadAjax_content">
p_elements = soup.find_all('div', attrs={'class': 'Readarea ReadAjax_content'})

# 自定义文件保存路径,例如保存到桌面
file_path = "C:\\Users\\xingchen\\Desktop\\novel_content.txt"

# 创建或打开txt文件,准备写入内容
with open(file_path, "w", encoding="utf-8") as file:
# 写入章节标题
for h1 in h1_elements:
file.write(h1.text + "\n\n")

# 写入正文内容,删除不需要的文本
for p in p_elements:
cleaned_text = p.text.replace("请收藏本站:。笔趣阁手机版:", "").replace("『点此报错』", "").replace("『加入书签』", "")
# 添加换行符
cleaned_text_with_breaks = cleaned_text.replace("。", "。\n").replace("!", "!\n").replace("?", "?\n")
file.write(cleaned_text_with_breaks + "\n\n")

print(f"内容已写入 {file_path} 文件")





在项目文件夹外新建 小说名+作者名.txt 文件并写入所有章节进去,没有自定义存放路径功能

import os
import requests
from bs4 import BeautifulSoup

# 目标网站URL
host = ""

# 发送HTTP请求获取网页内容
def requestUrl(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'}
html = requests.get(url, headers=headers)
return html.text

# 使用BeautifulSoup解析HTML
def getSoup(url):
html = requestUrl(url)
return BeautifulSoup(html, "html.parser")

# 获取小说章节内容
def getPage(page):
soup = getSoup(page)

# 查找章节标题
h1 = soup.find("h1", attrs={'class': 'wap_none'})
if h1 is None:
return "Title not found"

title = h1.text

# 查找章节内容
divText = soup.find(id="chaptercontent")

if divText is None:
return "Content not found"

divText = divText.getText("\n")
i = divText.rfind("请")
body = title + "\n" + divText[:i]
return body

# 获取小说作者信息
def getAuthor(soup):
author_meta = soup.find("meta", {"property": "og:novel:author"})
if author_meta:
return author_meta["content"]
return "Unknown Author"

# 爬虫主程序
def spider():
soup = getSoup(host)

# 获取小说信息
fileName = soup.find(attrs={'class': 'info'}).h1.string
author = getAuthor(soup)

# 构建保存路径
save_path = os.path.join(os.getcwd(), f"{fileName}_{author}.txt")

# 写入小说内容到文件
with open(save_path, "a", encoding='utf-8') as file:
for a in soup.find(attrs={'class': 'listmain'}).find_all("a"):
index = a["href"].rfind("/") + 1
file.write(getPage(host + a["href"][index:]))

# 执行爬虫程序


import os
import requests
from bs4 import BeautifulSoup

# 目标网站URL
host = ""

# 发送HTTP请求获取网页内容
def requestUrl(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'}
html = requests.get(url, headers=headers)
return html.text

# 使用BeautifulSoup解析HTML
def getSoup(url):
html = requestUrl(url)
return BeautifulSoup(html, "html.parser")

# 获取小说章节内容
def getPage(page):
soup = getSoup(page)

# 查找章节标题
h1 = soup.find("h1", attrs={'class': 'wap_none'})
if h1 is None:
return "Title not found"

title = h1.text

# 查找章节内容
divText = soup.find(id="chaptercontent")

if divText is None:
return "Content not found"

divText = divText.getText("\n")
i = divText.rfind("请")
body = title + "\n" + divText[:i]
return body

# 获取小说作者信息
def getAuthor(soup):
author_meta = soup.find("meta", {"property": "og:novel:author"})
if author_meta:
return author_meta["content"]
return "Unknown Author"

# 爬虫主程序,可以传入自定义保存路径
def spider(custom_save_path=None):
soup = getSoup(host)
fileName = soup.find(attrs={'class': 'info'}).h1.string
author = getAuthor(soup)

if custom_save_path is None:
# 如果没有自定义路径,则保存到当前工作目录下,文件名为 "小说名_作者名.txt"
custom_save_path = os.path.join(os.getcwd(), f"{fileName}_{author}.txt")

with open(custom_save_path, "a", encoding='utf-8') as file:
for a in soup.find(attrs={'class': 'listmain'}).find_all("a"):
index = a["href"].rfind("/") + 1
file.write(getPage(host + a["href"][index:]))

# 获取 fileName 和 author 的值
soup = getSoup(host)
fileName = soup.find(attrs={'class': 'info'}).h1.string
author = getAuthor(soup)

# 例子:将文件保存到D盘的Downloads目录下(绝对路径),并动态命名为 "小说名_作者名.txt"
custom_save_path = os.path.join("D:\\Downloads\\", "{}_{}.txt".format(fileName, author))

# 调用 spider 函数,传入自定义路径



import os
import requests
from bs4 import BeautifulSoup

# 目标网站URL
host = ""

# 发送HTTP请求获取网页内容
def requestUrl(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'}
html = requests.get(url, headers=headers)
return html.text

# 使用BeautifulSoup解析HTML
def getSoup(url):
html = requestUrl(url)
return BeautifulSoup(html, "html.parser")

# 获取小说章节内容
def getPage(page):
soup = getSoup(page)

# 查找章节标题
h1 = soup.find("h1", attrs={'class': 'wap_none'})
if h1 is None:
return "Title not found"

title = h1.text

# 查找章节内容
divText = soup.find(id="chaptercontent")

if divText is None:
return "Content not found"

divText = divText.getText("\n")
i = divText.rfind("请")
body = title + "\n" + divText[:i]
return body

# 获取小说作者信息
def getAuthor(soup):
author_meta = soup.find("meta", {"property": "og:novel:author"})
if author_meta:
return author_meta["content"]
return "Unknown Author"

# 爬虫主程序,可以传入自定义保存路径和指定的章节范围
def spider(custom_save_path=None, start_chapter=None, end_chapter=None):
soup = getSoup(host)
fileName = soup.find(attrs={'class': 'info'}).h1.string
author = getAuthor(soup)

if custom_save_path is None:
# 如果没有自定义路径,则保存到当前工作目录下,文件名为 "小说名_作者名.txt"
custom_save_path = os.path.join(os.getcwd(), f"{fileName}_{author}.txt")

with open(custom_save_path, "a", encoding='utf-8') as file:
chapters = soup.find(attrs={'class': 'listmain'}).find_all("a")

# 如果指定了章节范围,则只爬取指定范围内的章节
if start_chapter is not None and end_chapter is not None:
chapters = chapters[start_chapter-1:end_chapter+1]

for a in chapters:
index = a["href"].rfind("/") + 1
file.write(getPage(host + a["href"][index:]))

# 获取 fileName 和 author 的值
soup = getSoup(host)
fileName = soup.find(attrs={'class': 'info'}).h1.string
author = getAuthor(soup)

# 例子:将文件保存到D盘的Downloads目录下(绝对路径),并动态命名为 "小说名_作者名.txt"
custom_save_path = os.path.join("D:\\Downloads\\", "{}_{}.txt".format(fileName, author))

# 调用 spider 函数,传入自定义路径和指定的章节范围(例如1-100章)
spider(custom_save_path, start_chapter=1, end_chapter=100)




如果你要爬取其他笔趣阁或者网站的内容,请根据实际标签等更改源代码,以上源代码仅适用于: 这个站点。
