生成1千条评价,且条评价一个单独的文件夹和文档
python
import os
import random
import json
from datetime import datetime, timedelta
import shutil
import sys
import re
from pathlib import Path
# 默认配置
DEFAULT_CONFIG = {
"vocabulary": {
"prefix": ["", "这款", "新买的", "入手", "强烈推荐", "终于买到", "给家里添了"],
"product": ["海尔康养沙发", "电动真皮沙发", "多功能沙发", "头层牛皮沙发"],
"material": ["头层牛皮", "进口牛皮", "优质真皮", "防刮牛皮", "小牛皮"],
"feature": [
"三座独立电动", "无线遥控启动", "秒变大床",
"静音电机", "多角度调节", "一键平躺"
],
"charging": [
"Type-C快充", "USB充电", "双接口充电",
"隐藏式充电", "边充边用", "充电方便"
],
"comfort": [
"高回弹海绵", "久坐不塌", "支撑力强",
"坐感舒适", "不闷热", "弹性十足"
],
"design": [
"零距离贴墙", "靠墙无缝隙", "省空间设计",
"严丝合缝", "墙面零距离", "角落完美贴合"
],
"praise": [
"物超所值", "全家满意", "朋友都问",
"质量可靠", "大品牌放心", "用着省心"
]
},
"templates": [
"{prefix}{product}{material},{feature}",
"{product}的{feature}和{charging},{design}",
"用了{prefix}{product},{comfort},{design}",
"{product}{charging}设计,{praise}",
"三座都能{调节方式}{feature},{comfort}"
],
"special_words": {
"调节方式": ["平躺", "调节", "按摩"]
},
"fallbacks": [
"海尔沙发质量很好,舒适耐用",
"设计合理,做工精细,很满意",
"大品牌值得信赖,推荐购买"
],
"output_folder": "海尔沙发评价",
"num_reviews": 1000,
"date_range": 1000
}
def load_config():
"""加载配置文件"""
config_path = "review_config.json"
# 如果配置文件不存在,创建默认配置
if not os.path.exists(config_path):
with open(config_path, "w", encoding="utf-8") as f:
json.dump(DEFAULT_CONFIG, f, ensure_ascii=False, indent=2)
return DEFAULT_CONFIG
# 加载配置文件
try:
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
# 合并默认配置和用户配置
for key in DEFAULT_CONFIG:
if key not in config:
config[key] = DEFAULT_CONFIG[key]
elif isinstance(DEFAULT_CONFIG[key], dict) and isinstance(config[key], dict):
for subkey in DEFAULT_CONFIG[key]:
if subkey not in config[key]:
config[key][subkey] = DEFAULT_CONFIG[key][subkey]
return config
except Exception as e:
print(f"配置文件错误: {e}")
return DEFAULT_CONFIG
def generate_review(config):
"""生成不重复评价"""
max_attempts = 500
used_phrases = set()
def _generate():
"""内部生成函数"""
# 随机选择模板
template = random.choice(config["templates"])
# 替换特殊词
for key, options in config["special_words"].items():
if f"{{{key}}}" in template:
template = template.replace(f"{{{key}}}", random.choice(options))
# 替换占位符
while True:
placeholders = re.findall(r'\{(\w+)\}', template)
if not placeholders:
break
for ph in placeholders:
if ph in config["vocabulary"]:
value = random.choice(config["vocabulary"][ph])
template = template.replace(f"{{{ph}}}", value, 1)
else:
# 未知占位符替换为空
template = template.replace(f"{{{ph}}}", "", 1)
# 清洗文本
template = re.sub(r'[,。]{2,}', lambda m: m.group(0)[0], template)
template = template.strip()
return template
# 主生成函数
def generate():
for _ in range(max_attempts):
review = _generate()
if 10 <= len(review) <= 50 and review not in used_phrases:
used_phrases.add(review)
return review
return random.choice(config["fallbacks"])
return generate
def main():
"""主程序"""
print("=" * 50)
print("海尔沙发评价生成器")
print("=" * 50)
# 加载配置
config = load_config()
print(f"✅ 已加载配置文件: review_config.json")
print(f"词库数量: {sum(len(v) for v in config['vocabulary'].values())}个词汇")
print(f"模板数量: {len(config['templates'])}个模板")
# 创建生成器函数
generate = generate_review(config)
# 创建主目录
main_dir = config["output_folder"]
if os.path.exists(main_dir):
shutil.rmtree(main_dir)
os.makedirs(main_dir)
# 生成评价
num_reviews = config["num_reviews"]
date_range = config["date_range"]
start_date = datetime.now() - timedelta(days=date_range)
print(f"\n开始生成 {num_reviews} 条评价...")
for i in range(1, num_reviews + 1):
# 创建日期命名的文件夹
date_str = (start_date + timedelta(days=i)).strftime("%Y%m%d")
folder_name = f"评价_{date_str}"
folder_path = os.path.join(main_dir, folder_name)
os.makedirs(folder_path)
# 生成并保存评价
with open(os.path.join(folder_path, "评价.txt"), "w", encoding="utf-8") as f:
f.write(generate())
# 进度显示
if i % 100 == 0:
print(f"已生成 {i}/{num_reviews} 条评价")
print(f"\n✅ 全部完成!{num_reviews}条评价已保存至: {os.path.abspath(main_dir)}")
print("\n提示: 要自定义评价内容,请修改同目录下的 review_config.json 文件")
print("=" * 50)
# 保持窗口打开
if getattr(sys, 'frozen', False):
input("\n按Enter键退出...")
if __name__ == "__main__":
main()
json
{
"vocabulary": {
"prefix": [
"",
"这款",
"新买的",
"入手",
"强烈推荐",
"终于买到",
"给家里添了"
],
"product": [
"海尔康养沙发",
"电动真皮沙发",
"多功能沙发",
"头层牛皮沙发"
],
"material": [
"头层牛皮",
"进口牛皮",
"优质真皮",
"防刮牛皮",
"小牛皮"
],
"feature": [
"三座独立电动",
"无线遥控启动",
"秒变大床",
"静音电机",
"多角度调节",
"一键平躺"
],
"charging": [
"Type-C快充",
"USB充电",
"双接口充电",
"隐藏式充电",
"边充边用",
"充电方便"
],
"comfort": [
"高回弹海绵",
"久坐不塌",
"支撑力强",
"坐感舒适",
"不闷热",
"弹性十足"
],
"design": [
"零距离贴墙",
"靠墙无缝隙",
"省空间设计",
"严丝合缝",
"墙面零距离",
"角落完美贴合"
],
"praise": [
"物超所值",
"全家满意",
"朋友都问",
"质量可靠",
"大品牌放心",
"用着省心"
]
},
"templates": [
"{prefix}{product}{material},{feature}",
"{product}的{feature}和{charging},{design}",
"用了{prefix}{product},{comfort},{design}",
"{product}{charging}设计,{praise}",
"三座都能{调节方式}{feature},{comfort}"
],
"special_words": {
"调节方式": [
"平躺",
"调节",
"按摩"
]
},
"fallbacks": [
"海尔沙发质量很好,舒适耐用",
"设计合理,做工精细,很满意",
"大品牌值得信赖,推荐购买"
],
"output_folder": "海尔沙发评价",
"num_reviews": 1000,
"date_range": 1000
}
使用说明:
配置文件 (review_config.json):
程序首次运行时会自动创建此文件
您可以修改此文件来自定义评价内容
文件结构如下:
{
"vocabulary": {
"分类名称": ["词1", "词2", ...]
},
"templates": [
"评价模板{带占位符}",
"另一个模板{带不同占位符}"
],
"special_words": {
"特殊词名称": ["选项1", "选项2"]
},
"fallbacks": ["备用评价1", "备用评价2"],
"output_folder": "输出文件夹名称",
"num_reviews": 生成评价数量,
"date_range": 日期范围天数
}
自定义评价:
在
vocabulary
中添加/修改词库在
templates
中添加/修改评价模板使用
{占位符}
格式插入词库内容特殊词用
{特殊词名称}
格式,在special_words
中配置
示例模板:
"templates": [
"{prefix}{product}{material},{feature}",
"{product}的{feature}和{charging},{design}",
"三座都能{调节方式}{feature},{comfort}"
]
打包成.exe文件:
安装PyInstaller:
pip install pyinstaller
打包命令:
pyinstaller --onefile --console --name "海尔评价生成器" sofa_review_generator.py
打包完成后:
在
dist
文件夹中找到海尔评价生成器.exe
将
review_config.json
放在同一目录下
评论