生成1千条评价,且条评价一个单独的文件夹和文档

六月 19, 2025 / Ming / 2阅读 / 0评论/ 分类: 默认分类

python

import os
import random
import json
from datetime import datetime, timedelta
import shutil
import sys
import re
from pathlib import Path

# 默认配置
DEFAULT_CONFIG = {
    "vocabulary": {
        "prefix": ["", "这款", "新买的", "入手", "强烈推荐", "终于买到", "给家里添了"],
        "product": ["海尔康养沙发", "电动真皮沙发", "多功能沙发", "头层牛皮沙发"],
        "material": ["头层牛皮", "进口牛皮", "优质真皮", "防刮牛皮", "小牛皮"],
        "feature": [
            "三座独立电动", "无线遥控启动", "秒变大床", 
            "静音电机", "多角度调节", "一键平躺"
        ],
        "charging": [
            "Type-C快充", "USB充电", "双接口充电",
            "隐藏式充电", "边充边用", "充电方便"
        ],
        "comfort": [
            "高回弹海绵", "久坐不塌", "支撑力强",
            "坐感舒适", "不闷热", "弹性十足"
        ],
        "design": [
            "零距离贴墙", "靠墙无缝隙", "省空间设计",
            "严丝合缝", "墙面零距离", "角落完美贴合"
        ],
        "praise": [
            "物超所值", "全家满意", "朋友都问",
            "质量可靠", "大品牌放心", "用着省心"
        ]
    },
    "templates": [
        "{prefix}{product}{material},{feature}",
        "{product}的{feature}和{charging},{design}",
        "用了{prefix}{product},{comfort},{design}",
        "{product}{charging}设计,{praise}",
        "三座都能{调节方式}{feature},{comfort}"
    ],
    "special_words": {
        "调节方式": ["平躺", "调节", "按摩"]
    },
    "fallbacks": [
        "海尔沙发质量很好,舒适耐用",
        "设计合理,做工精细,很满意",
        "大品牌值得信赖,推荐购买"
    ],
    "output_folder": "海尔沙发评价",
    "num_reviews": 1000,
    "date_range": 1000
}

def load_config():
    """加载配置文件"""
    config_path = "review_config.json"
    
    # 如果配置文件不存在,创建默认配置
    if not os.path.exists(config_path):
        with open(config_path, "w", encoding="utf-8") as f:
            json.dump(DEFAULT_CONFIG, f, ensure_ascii=False, indent=2)
        return DEFAULT_CONFIG
    
    # 加载配置文件
    try:
        with open(config_path, "r", encoding="utf-8") as f:
            config = json.load(f)
        
        # 合并默认配置和用户配置
        for key in DEFAULT_CONFIG:
            if key not in config:
                config[key] = DEFAULT_CONFIG[key]
            elif isinstance(DEFAULT_CONFIG[key], dict) and isinstance(config[key], dict):
                for subkey in DEFAULT_CONFIG[key]:
                    if subkey not in config[key]:
                        config[key][subkey] = DEFAULT_CONFIG[key][subkey]
        
        return config
    except Exception as e:
        print(f"配置文件错误: {e}")
        return DEFAULT_CONFIG

def generate_review(config):
    """生成不重复评价"""
    max_attempts = 500
    used_phrases = set()
    
    def _generate():
        """内部生成函数"""
        # 随机选择模板
        template = random.choice(config["templates"])
        
        # 替换特殊词
        for key, options in config["special_words"].items():
            if f"{{{key}}}" in template:
                template = template.replace(f"{{{key}}}", random.choice(options))
        
        # 替换占位符
        while True:
            placeholders = re.findall(r'\{(\w+)\}', template)
            if not placeholders:
                break
                
            for ph in placeholders:
                if ph in config["vocabulary"]:
                    value = random.choice(config["vocabulary"][ph])
                    template = template.replace(f"{{{ph}}}", value, 1)
                else:
                    # 未知占位符替换为空
                    template = template.replace(f"{{{ph}}}", "", 1)
        
        # 清洗文本
        template = re.sub(r'[,。]{2,}', lambda m: m.group(0)[0], template)
        template = template.strip()
        
        return template
    
    # 主生成函数
    def generate():
        for _ in range(max_attempts):
            review = _generate()
            if 10 <= len(review) <= 50 and review not in used_phrases:
                used_phrases.add(review)
                return review
        return random.choice(config["fallbacks"])
    
    return generate

def main():
    """主程序"""
    print("=" * 50)
    print("海尔沙发评价生成器")
    print("=" * 50)
    
    # 加载配置
    config = load_config()
    print(f"✅ 已加载配置文件: review_config.json")
    print(f"词库数量: {sum(len(v) for v in config['vocabulary'].values())}个词汇")
    print(f"模板数量: {len(config['templates'])}个模板")
    
    # 创建生成器函数
    generate = generate_review(config)
    
    # 创建主目录
    main_dir = config["output_folder"]
    if os.path.exists(main_dir):
        shutil.rmtree(main_dir)
    os.makedirs(main_dir)
    
    # 生成评价
    num_reviews = config["num_reviews"]
    date_range = config["date_range"]
    start_date = datetime.now() - timedelta(days=date_range)
    
    print(f"\n开始生成 {num_reviews} 条评价...")
    
    for i in range(1, num_reviews + 1):
        # 创建日期命名的文件夹
        date_str = (start_date + timedelta(days=i)).strftime("%Y%m%d")
        folder_name = f"评价_{date_str}"
        folder_path = os.path.join(main_dir, folder_name)
        os.makedirs(folder_path)
        
        # 生成并保存评价
        with open(os.path.join(folder_path, "评价.txt"), "w", encoding="utf-8") as f:
            f.write(generate())
        
        # 进度显示
        if i % 100 == 0:
            print(f"已生成 {i}/{num_reviews} 条评价")
    
    print(f"\n✅ 全部完成!{num_reviews}条评价已保存至: {os.path.abspath(main_dir)}")
    print("\n提示: 要自定义评价内容,请修改同目录下的 review_config.json 文件")
    print("=" * 50)
    
    # 保持窗口打开
    if getattr(sys, 'frozen', False):
        input("\n按Enter键退出...")

if __name__ == "__main__":
    main()

json

{
  "vocabulary": {
    "prefix": [
      "",
      "这款",
      "新买的",
      "入手",
      "强烈推荐",
      "终于买到",
      "给家里添了"
    ],
    "product": [
      "海尔康养沙发",
      "电动真皮沙发",
      "多功能沙发",
      "头层牛皮沙发"
    ],
    "material": [
      "头层牛皮",
      "进口牛皮",
      "优质真皮",
      "防刮牛皮",
      "小牛皮"
    ],
    "feature": [
      "三座独立电动",
      "无线遥控启动",
      "秒变大床",
      "静音电机",
      "多角度调节",
      "一键平躺"
    ],
    "charging": [
      "Type-C快充",
      "USB充电",
      "双接口充电",
      "隐藏式充电",
      "边充边用",
      "充电方便"
    ],
    "comfort": [
      "高回弹海绵",
      "久坐不塌",
      "支撑力强",
      "坐感舒适",
      "不闷热",
      "弹性十足"
    ],
    "design": [
      "零距离贴墙",
      "靠墙无缝隙",
      "省空间设计",
      "严丝合缝",
      "墙面零距离",
      "角落完美贴合"
    ],
    "praise": [
      "物超所值",
      "全家满意",
      "朋友都问",
      "质量可靠",
      "大品牌放心",
      "用着省心"
    ]
  },
  "templates": [
    "{prefix}{product}{material},{feature}",
    "{product}的{feature}和{charging},{design}",
    "用了{prefix}{product},{comfort},{design}",
    "{product}{charging}设计,{praise}",
    "三座都能{调节方式}{feature},{comfort}"
  ],
  "special_words": {
    "调节方式": [
      "平躺",
      "调节",
      "按摩"
    ]
  },
  "fallbacks": [
    "海尔沙发质量很好,舒适耐用",
    "设计合理,做工精细,很满意",
    "大品牌值得信赖,推荐购买"
  ],
  "output_folder": "海尔沙发评价",
  "num_reviews": 1000,
  "date_range": 1000
}

使用说明:

  1. 配置文件 (review_config.json)

    • 程序首次运行时会自动创建此文件

    • 您可以修改此文件来自定义评价内容

文件结构如下:

{
  "vocabulary": {
    "分类名称": ["词1", "词2", ...]
  },
  "templates": [
    "评价模板{带占位符}",
    "另一个模板{带不同占位符}"
  ],
  "special_words": {
    "特殊词名称": ["选项1", "选项2"]
  },
  "fallbacks": ["备用评价1", "备用评价2"],
  "output_folder": "输出文件夹名称",
  "num_reviews": 生成评价数量,
  "date_range": 日期范围天数
}
  1. 自定义评价

    • vocabulary 中添加/修改词库

    • templates 中添加/修改评价模板

    • 使用 {占位符} 格式插入词库内容

    • 特殊词用 {特殊词名称} 格式,在 special_words 中配置

示例模板

"templates": [
  "{prefix}{product}{material},{feature}",
  "{product}的{feature}和{charging},{design}",
  "三座都能{调节方式}{feature},{comfort}"
]

打包成.exe文件:

安装PyInstaller:

pip install pyinstaller

打包命令:

pyinstaller --onefile --console --name "海尔评价生成器" sofa_review_generator.py
  1. 打包完成后:

    • dist 文件夹中找到 海尔评价生成器.exe

    • review_config.json 放在同一目录下

#电脑软件(10)

评论