feishu_fabu/scripts/wp_publish.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
WordPress 发布系统 - 主发布脚本
整合 Word 解析、图片上传、分类匹配、文章发布全流程
"""

import os
import sys
import json
import argparse

# 添加项目根目录到 Python 路径
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, BASE_DIR)

from modules.wp_logger import get_publish_logger, get_debug_logger
from modules.wp_parse_docx import parse_word_file
from modules.wp_image_handler import create_image_handler
from modules.wp_formatter import create_formatter
from modules.wp_api import create_wp_api
from modules.wp_category import create_category_matcher

# 配置文件路径
CONFIG_FILE = os.path.join(BASE_DIR, 'config.py')


def load_config():
    """加载配置文件"""
    config = {
        'wp_url': 'https://www.nanlou.net',
        'wp_user': 'shaowu',
        'wp_password': 'zjzz gHYm 8Q3l KbZk y4CF 2DQi',
        'default_category': 7,
        'auto_match_category': True,
        'optimize_images': True,
        'image_max_width': 1200,
        'image_quality': 85,
        'post_status': 'publish'
    }

    if os.path.exists(CONFIG_FILE):
        try:
            with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
                exec(f.read(), config)
        except Exception as e:
            print(f"加载配置文件失败：{str(e)}，使用默认配置")

    return config


def publish_word_document(word_file_path, instruction=None, status=None, category_id=None, tags=None):
    """
    发布 Word 文档到 WordPress

    Args:
        word_file_path: Word 文档路径
        instruction: 指令文本（可选）
        status: 发布状态（可选，默认从配置读取）
        category_id: 指定分类 ID（可选）
        tags: 标签列表（可选）

    Returns:
        dict: 发布结果
    """
    # 初始化日志
    pl = get_publish_logger()
    dl = get_debug_logger()

    # 加载配置
    config = load_config()

    # 初始化各模块
    wp_api = create_wp_api(config['wp_url'], config['wp_user'], config['wp_password'])
    image_handler = create_image_handler(config['wp_url'], config['wp_user'], config['wp_password'])
    formatter = create_formatter()
    category_matcher = create_category_matcher(wp_api)

    # 开始发布
    pl.start_publish('Word 文档', os.path.basename(word_file_path))

    try:
        # ========== 步骤 1：解析 Word 文档 ==========
        dl.log_step("解析 Word 文档", word_file_path)
        parse_result = parse_word_file(word_file_path)

        title = parse_result['title']
        content_parts = parse_result['content']
        images = parse_result['images']
        metadata = parse_result['metadata']

        pl.info(f"📖 解析完成 - 标题：{title}, 段落数：{metadata['paragraph_count']}, 图片数：{metadata['image_count']}")

        # ========== 步骤 2：上传图片 ==========
        uploaded_images = []
        if images:
            dl.log_step("上传图片", f"共 {len(images)} 张图片")
            uploaded_images = image_handler.upload_images_batch(images)
            pl.info(f"📤 图片上传完成 - 成功 {len([img for img in uploaded_images if 'url' in img])} 张")

        # ========== 步骤 3：匹配分类 ==========
        if category_id:
            final_category_id = category_id
        else:
            final_category_id = category_matcher.match(
                instruction=instruction,
                title=title,
                content=' '.join(content_parts),
                auto_match=config.get('auto_match_category', True)
            )

        # ========== 步骤 4：格式化 HTML ==========
        dl.log_step("格式化 HTML 内容")
        content_html = formatter.format_content(content_parts, uploaded_images)

        # 生成摘要
        excerpt = formatter.generate_excerpt(content_html)

        # ========== 步骤 5：发布文章 ==========
        dl.log_step("发布文章")

        # 确定发布状态
        post_status = status or config.get('post_status', 'publish')

        # 构建发布数据
        publish_data = {
            'title': title,
            'content': content_html,
            'status': post_status,
            'categories': [final_category_id],
            'excerpt': excerpt
        }

        if tags:
            publish_data['tags'] = tags

        # 如果有上传的图片，设置第一张为特色图片
        if uploaded_images and uploaded_images[0].get('url'):
            # 获取特色图片 ID
            featured_img = uploaded_images[0]
            if 'id' in featured_img:
                publish_data['featured_media'] = featured_img['id']
                dl.debug(f"设置特色图片 ID: {featured_img['id']}")

        # 调用 API 发布
        result = wp_api.create_post(**publish_data)

        # ========== 步骤 6：输出结果 ==========
        if result.get('success'):
            pl.end_publish(
                True,
                post_id=result.get('id'),
                post_url=result.get('url')
            )

            # 清理临时文件
            _cleanup_temp_files()

            return {
                'success': True,
                'post_id': result.get('id'),
                'post_url': result.get('url'),
                'title': title,
                'category_id': final_category_id,
                'images_uploaded': len([img for img in uploaded_images if 'url' in img])
            }
        else:
            pl.end_publish(False, error_msg=result.get('error'))
            return {
                'success': False,
                'error': result.get('error')
            }

    except Exception as e:
        pl.end_publish(False, error_msg=str(e))
        dl.error(f"发布异常：{str(e)}", exc_info=True)
        return {
            'success': False,
            'error': str(e)
        }


def _cleanup_temp_files():
    """清理临时文件"""
    temp_dir = os.path.join(BASE_DIR, 'temp')
    if os.path.exists(temp_dir):
        for filename in os.listdir(temp_dir):
            file_path = os.path.join(temp_dir, filename)
            try:
                if os.path.isfile(file_path):
                    os.remove(file_path)
            except Exception as e:
                print(f"清理临时文件失败：{file_path}, {str(e)}")


def main():
    """命令行入口"""
    parser = argparse.ArgumentParser(description='WordPress 文章发布工具')
    parser.add_argument('file', help='Word 文档路径 (.docx)')
    parser.add_argument('--instruction', '-i', help='发布指令（如：#分类 技术）')
    parser.add_argument('--status', '-s', choices=['publish', 'draft', 'pending', 'private'],
                       default=None, help='发布状态')
    parser.add_argument('--category', '-c', type=int, help='指定分类 ID')
    parser.add_argument('--tags', '-t', help='标签 ID 列表（逗号分隔）')
    parser.add_argument('--dry-run', '-d', action='store_true', help='预览模式（不实际发布）')

    args = parser.parse_args()

    # 检查文件是否存在
    if not os.path.exists(args.file):
        print(f"❌ 文件不存在：{args.file}")
        sys.exit(1)

    # 解析标签
    tags = None
    if args.tags:
        tags = [int(t.strip()) for t in args.tags.split(',') if t.strip()]

    # 预览模式
    if args.dry_run:
        print("🔍 预览模式 - 解析文档内容：")
        parse_result = parse_word_file(args.file)
        print(f"  标题：{parse_result['title']}")
        print(f"  段落数：{parse_result['metadata']['paragraph_count']}")
        print(f"  图片数：{parse_result['metadata']['image_count']}")
        print(f"  字数：{parse_result['metadata']['word_count']}")
        print(f"\nHTML 内容预览：")
        print('\n'.join(parse_result['content'][:10]))
        print("...")
        sys.exit(0)

    # 执行发布
    result = publish_word_document(
        word_file_path=args.file,
        instruction=args.instruction,
        status=args.status,
        category_id=args.category,
        tags=tags
    )

    # 输出 JSON 结果
    print("\n" + json.dumps(result, ensure_ascii=False, indent=2))

    # 返回状态码
    sys.exit(0 if result.get('success') else 1)


if __name__ == '__main__':
    main()