feishu_fabu/scripts/wp_publish_text.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
WordPress 发布系统 - 文字 + 图片发布脚本
处理从飞书等渠道发送的文字和图片，自动发布到 WordPress
"""

import os
import sys
import json
import argparse
import base64
import hashlib

# 添加项目根目录到 Python 路径
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, BASE_DIR)

from modules.wp_logger import get_publish_logger, get_debug_logger
from modules.wp_image_handler import create_image_handler
from modules.wp_formatter import create_formatter
from modules.wp_api import create_wp_api
from modules.wp_category import create_category_matcher

# 配置文件路径
CONFIG_FILE = os.path.join(BASE_DIR, 'config.py')


def load_config():
    """加载配置文件"""
    config = {
        'wp_url': 'https://www.nanlou.net',
        'wp_user': 'shaowu',
        'wp_password': 'zjzz gHYm 8Q3l KbZk y4CF 2DQi',
        'default_category': 7,
        'auto_match_category': True,
        'optimize_images': True,
        'image_max_width': 1200,
        'image_quality': 85,
        'post_status': 'publish'
    }

    if os.path.exists(CONFIG_FILE):
        try:
            with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
                exec(f.read(), config)
        except Exception as e:
            print(f"加载配置文件失败：{str(e)}，使用默认配置")

    return config


def publish_text_with_images(text, images=None, instruction=None, status=None,
                            category_id=None, tags=None, title=None):
    """
    发布文字 + 图片到 WordPress

    Args:
        text: 文字内容
        images: 图片列表，每个图片包含 data (base64 或文件路径), filename
        instruction: 指令文本（可选）
        status: 发布状态（可选）
        category_id: 指定分类 ID（可选）
        tags: 标签列表（可选）
        title: 文章标题（可选，默认从内容提取）

    Returns:
        dict: 发布结果
    """
    # 初始化日志
    pl = get_publish_logger()
    dl = get_debug_logger()

    # 加载配置
    config = load_config()

    # 初始化各模块
    wp_api = create_wp_api(config['wp_url'], config['wp_user'], config['wp_password'])
    image_handler = create_image_handler(config['wp_url'], config['wp_user'], config['wp_password'])
    formatter = create_formatter()
    category_matcher = create_category_matcher(wp_api)

    # 开始发布
    pl.start_publish('文字 + 图片')

    try:
        # ========== 步骤 1：提取标题 ==========
        if not title:
            title = formatter.extract_title_from_content(text)

        pl.info(f"📝 文章标题：{title}")
        dl.log_step("提取标题", title)

        # ========== 步骤 2：上传图片 ==========
        uploaded_images = []
        if images:
            dl.log_step("上传图片", f"共 {len(images)} 张图片")

            for i, img in enumerate(images):
                try:
                    # 处理 base64 图片
                    if isinstance(img, str) and os.path.exists(img):
                        # 文件路径
                        img_path = img
                    elif isinstance(img, dict) and 'data' in img:
                        # 字典格式（包含 base64 数据）
                        img_data = img['data']
                        if isinstance(img_data, str):
                            # base64 编码
                            img_data = base64.b64decode(img_data)

                        filename = img.get('filename', f'image_{i+1}.jpg')
                        img_hash = hashlib.md5(img_data).hexdigest()[:8]
                        ext = os.path.splitext(filename)[1] or '.jpg'
                        filename = f"image_{i+1}_{img_hash}{ext}"

                        # 保存到临时文件
                        temp_dir = os.path.join(BASE_DIR, 'temp')
                        os.makedirs(temp_dir, exist_ok=True)
                        img_path = os.path.join(temp_dir, filename)
                        with open(img_path, 'wb') as f:
                            f.write(img_data)
                    else:
                        continue

                    # 上传图片
                    result = image_handler.upload_image(
                        img_path,
                        title=f"图片 {i+1}",
                        alt_text=f"文章配图 {i+1}"
                    )
                    uploaded_images.append(result)

                    # 清理临时文件
                    if 'img_path' in locals() and os.path.exists(img_path):
                        os.remove(img_path)

                except Exception as e:
                    pl.error(f"图片 {i+1} 上传失败：{str(e)}")
                    dl.error(f"图片上传失败：{str(e)}", exc_info=True)

            pl.info(f"📤 图片上传完成 - 成功 {len(uploaded_images)} 张")

        # ========== 步骤 3：匹配分类 ==========
        if category_id:
            final_category_id = category_id
        else:
            final_category_id = category_matcher.match(
                instruction=instruction,
                title=title,
                content=text,
                auto_match=config.get('auto_match_category', True)
            )

        # ========== 步骤 4：格式化 HTML ==========
        dl.log_step("格式化 HTML 内容")

        # 先格式化文字内容
        content_html = formatter.format_text_content(text)

        # 插入图片
        if uploaded_images:
            # 将图片插入到内容中（每段之间）
            paragraphs = content_html.split('</p>')
            new_html = []
            img_index = 0

            for para in paragraphs:
                para = para.strip()
                if not para:
                    continue

                new_html.append(para + '</p>')

                # 在段落间插入图片
                if img_index < len(uploaded_images):
                    img = uploaded_images[img_index]
                    if 'url' in img:
                        img_html = f'<img src="{img["url"]}" alt="{img.get("title", "")}" style="max-width: 100%; height: auto; display: block; margin: 16px auto;">'
                        new_html.append(img_html)
                        img_index += 1

            content_html = '\n\n'.join(new_html)

        # 生成摘要
        excerpt = formatter.generate_excerpt(content_html)

        # ========== 步骤 5：发布文章 ==========
        dl.log_step("发布文章")

        # 确定发布状态
        post_status = status or config.get('post_status', 'publish')

        # 构建发布数据
        publish_data = {
            'title': title,
            'content': content_html,
            'status': post_status,
            'categories': [final_category_id],
            'excerpt': excerpt
        }

        if tags:
            publish_data['tags'] = tags

        # 如果有上传的图片，设置第一张为特色图片
        if uploaded_images and uploaded_images[0].get('id'):
            publish_data['featured_media'] = uploaded_images[0]['id']
            dl.debug(f"设置特色图片 ID: {uploaded_images[0]['id']}")

        # 调用 API 发布
        result = wp_api.create_post(**publish_data)

        # ========== 步骤 6：输出结果 ==========
        if result.get('success'):
            pl.end_publish(
                True,
                post_id=result.get('id'),
                post_url=result.get('url')
            )

            return {
                'success': True,
                'post_id': result.get('id'),
                'post_url': result.get('url'),
                'title': title,
                'category_id': final_category_id,
                'images_uploaded': len(uploaded_images)
            }
        else:
            pl.end_publish(False, error_msg=result.get('error'))
            return {
                'success': False,
                'error': result.get('error')
            }

    except Exception as e:
        pl.end_publish(False, error_msg=str(e))
        dl.error(f"发布异常：{str(e)}", exc_info=True)
        return {
            'success': False,
            'error': str(e)
        }


def update_post_with_text(target, new_text, new_title=None, status='publish',
                          category_id=None, tags=None):
    """
    更新已有文章（追加内容模式）

    Args:
        target: 文章 ID（数字）或标题关键词（字符串）
        new_text: 新增的正文内容
        new_title: 新标题（可选）
        status: 发布状态
        category_id: 分类 ID（可选）
        tags: 标签列表（可选）

    Returns:
        dict: 更新结果
    """
    pl = get_publish_logger()
    dl = get_debug_logger()
    config = load_config()

    wp_api = create_wp_api(config['wp_url'], config['wp_user'], config['wp_password'])
    formatter = create_formatter()

    pl.start_publish('更新文章', target)

    try:
        # ========== 步骤 1：查找文章 ==========
        post_data = None
        post_id = None

        if target.isdigit():
            # 按 ID 精确查找
            post_id = int(target)
            result = wp_api.get_post(post_id)
            if result.get('success'):
                post_data = result.get('data')
            else:
                pl.error(f"未找到文章 ID: {post_id}")
                return {'success': False, 'error': f'未找到文章 ID: {post_id}'}
        else:
            # 按标题搜索
            search_results = wp_api.search_posts(target, per_page=1)
            if not search_results:
                pl.error(f"未找到匹配标题的文章: {target}")
                return {'success': False, 'error': f'未找到匹配标题的文章: {target}'}
            post_data = search_results[0]
            post_id = post_data.get('id')
            pl.info(f"🔍 搜索匹配到文章 ID: {post_id}")

        # ========== 步骤 2：获取原文内容 ==========
        old_content = post_data.get('content', {}).get('rendered', '')
        old_title = post_data.get('title', {}).get('rendered', '')

        pl.info(f"📖 原文标题：{old_title}，原文长度：{len(old_content)} 字符")

        # ========== 步骤 3：格式化新内容 ==========
        new_html = formatter.format_text_content(new_text)

        # ========== 步骤 4：合并内容（追加模式）==========
        merged_content = old_content.rstrip() + "\n\n" + new_html.lstrip()

        # ========== 步骤 5：构建更新数据 ==========
        final_title = new_title if new_title else old_title

        update_result = wp_api.update_post(
            post_id=post_id,
            title=final_title,
            content=merged_content,
            status=status,
            categories=[category_id] if category_id else None,
            tags=tags
        )

        # ========== 步骤 6：返回结果 ==========
        if update_result.get('success'):
            post_url = update_result.get('data', {}).get('link', '')
            pl.end_publish(True, post_id=post_id, post_url=post_url)
            return {
                'success': True,
                'post_id': post_id,
                'post_url': post_url,
                'title': final_title,
                'original_length': len(old_content),
                'merged_length': len(merged_content)
            }
        else:
            pl.end_publish(False, error_msg=update_result.get('error'))
            return {'success': False, 'error': update_result.get('error')}

    except Exception as e:
        pl.end_publish(False, error_msg=str(e))
        dl.error(f"更新异常：{str(e)}", exc_info=True)
        return {'success': False, 'error': str(e)}


def main():
    """命令行入口"""
    parser = argparse.ArgumentParser(description='WordPress 文字 + 图片发布工具')
    parser.add_argument('text', help='文字内容')
    parser.add_argument('--images', '-i', nargs='+', help='图片文件路径')
    parser.add_argument('--title', '-t', help='文章标题')
    parser.add_argument('--instruction', '-c', help='发布指令（如：#分类 技术）')
    parser.add_argument('--status', '-s', choices=['publish', 'draft', 'pending', 'private'],
                       default=None, help='发布状态')
    parser.add_argument('--category', '-C', type=int, help='指定分类 ID')
    parser.add_argument('--tags', '-T', help='标签 ID 列表（逗号分隔）')

    args = parser.parse_args()

    # 解析标签
    tags = None
    if args.tags:
        tags = [int(t.strip()) for t in args.tags.split(',') if t.strip()]

    # 执行发布
    result = publish_text_with_images(
        text=args.text,
        images=args.images,
        instruction=args.instruction,
        status=args.status,
        category_id=args.category,
        tags=tags,
        title=args.title
    )

    # 输出 JSON 结果
    print("\n" + json.dumps(result, ensure_ascii=False, indent=2))

    # 返回状态码
    sys.exit(0 if result.get('success') else 1)


if __name__ == '__main__':
    main()