feishu_fabu/scripts/wp_publish.py
wp-publish-bot 1fb93e34c6 feat: 初始化 WordPress 自动发布系统(飞书机器人集成)
- 飞书消息接收与处理(文字、图片、Word 文档)
- WordPress REST API 文章发布
- 图片自动上传到媒体库
- Word 文档解析与发布
- HTML 格式化与分类自动匹配
- Python CLI 工具(避免 shell 引号冲突)
- Webhook 服务器(8080 端口)
- 完整日志系统
2026-05-12 15:09:30 +08:00

250 lines
8.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
WordPress 发布系统 - 主发布脚本
整合 Word 解析、图片上传、分类匹配、文章发布全流程
"""
import os
import sys
import json
import argparse
# 添加项目根目录到 Python 路径
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, BASE_DIR)
from modules.wp_logger import get_publish_logger, get_debug_logger
from modules.wp_parse_docx import parse_word_file
from modules.wp_image_handler import create_image_handler
from modules.wp_formatter import create_formatter
from modules.wp_api import create_wp_api
from modules.wp_category import create_category_matcher
# 配置文件路径
CONFIG_FILE = os.path.join(BASE_DIR, 'config.py')
def load_config():
"""加载配置文件"""
config = {
'wp_url': 'https://www.nanlou.net',
'wp_user': 'shaowu',
'wp_password': 'zjzz gHYm 8Q3l KbZk y4CF 2DQi',
'default_category': 7,
'auto_match_category': True,
'optimize_images': True,
'image_max_width': 1200,
'image_quality': 85,
'post_status': 'publish'
}
if os.path.exists(CONFIG_FILE):
try:
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
exec(f.read(), config)
except Exception as e:
print(f"加载配置文件失败:{str(e)},使用默认配置")
return config
def publish_word_document(word_file_path, instruction=None, status=None, category_id=None, tags=None):
"""
发布 Word 文档到 WordPress
Args:
word_file_path: Word 文档路径
instruction: 指令文本(可选)
status: 发布状态(可选,默认从配置读取)
category_id: 指定分类 ID可选
tags: 标签列表(可选)
Returns:
dict: 发布结果
"""
# 初始化日志
pl = get_publish_logger()
dl = get_debug_logger()
# 加载配置
config = load_config()
# 初始化各模块
wp_api = create_wp_api(config['wp_url'], config['wp_user'], config['wp_password'])
image_handler = create_image_handler(config['wp_url'], config['wp_user'], config['wp_password'])
formatter = create_formatter()
category_matcher = create_category_matcher(wp_api)
# 开始发布
pl.start_publish('Word 文档', os.path.basename(word_file_path))
try:
# ========== 步骤 1解析 Word 文档 ==========
dl.log_step("解析 Word 文档", word_file_path)
parse_result = parse_word_file(word_file_path)
title = parse_result['title']
content_parts = parse_result['content']
images = parse_result['images']
metadata = parse_result['metadata']
pl.info(f"📖 解析完成 - 标题:{title}, 段落数:{metadata['paragraph_count']}, 图片数:{metadata['image_count']}")
# ========== 步骤 2上传图片 ==========
uploaded_images = []
if images:
dl.log_step("上传图片", f"{len(images)} 张图片")
uploaded_images = image_handler.upload_images_batch(images)
pl.info(f"📤 图片上传完成 - 成功 {len([img for img in uploaded_images if 'url' in img])}")
# ========== 步骤 3匹配分类 ==========
if category_id:
final_category_id = category_id
else:
final_category_id = category_matcher.match(
instruction=instruction,
title=title,
content=' '.join(content_parts),
auto_match=config.get('auto_match_category', True)
)
# ========== 步骤 4格式化 HTML ==========
dl.log_step("格式化 HTML 内容")
content_html = formatter.format_content(content_parts, uploaded_images)
# 生成摘要
excerpt = formatter.generate_excerpt(content_html)
# ========== 步骤 5发布文章 ==========
dl.log_step("发布文章")
# 确定发布状态
post_status = status or config.get('post_status', 'publish')
# 构建发布数据
publish_data = {
'title': title,
'content': content_html,
'status': post_status,
'categories': [final_category_id],
'excerpt': excerpt
}
if tags:
publish_data['tags'] = tags
# 如果有上传的图片,设置第一张为特色图片
if uploaded_images and uploaded_images[0].get('url'):
# 获取特色图片 ID
featured_img = uploaded_images[0]
if 'id' in featured_img:
publish_data['featured_media'] = featured_img['id']
dl.debug(f"设置特色图片 ID: {featured_img['id']}")
# 调用 API 发布
result = wp_api.create_post(**publish_data)
# ========== 步骤 6输出结果 ==========
if result.get('success'):
pl.end_publish(
True,
post_id=result.get('id'),
post_url=result.get('url')
)
# 清理临时文件
_cleanup_temp_files()
return {
'success': True,
'post_id': result.get('id'),
'post_url': result.get('url'),
'title': title,
'category_id': final_category_id,
'images_uploaded': len([img for img in uploaded_images if 'url' in img])
}
else:
pl.end_publish(False, error_msg=result.get('error'))
return {
'success': False,
'error': result.get('error')
}
except Exception as e:
pl.end_publish(False, error_msg=str(e))
dl.error(f"发布异常:{str(e)}", exc_info=True)
return {
'success': False,
'error': str(e)
}
def _cleanup_temp_files():
"""清理临时文件"""
temp_dir = os.path.join(BASE_DIR, 'temp')
if os.path.exists(temp_dir):
for filename in os.listdir(temp_dir):
file_path = os.path.join(temp_dir, filename)
try:
if os.path.isfile(file_path):
os.remove(file_path)
except Exception as e:
print(f"清理临时文件失败:{file_path}, {str(e)}")
def main():
"""命令行入口"""
parser = argparse.ArgumentParser(description='WordPress 文章发布工具')
parser.add_argument('file', help='Word 文档路径 (.docx)')
parser.add_argument('--instruction', '-i', help='发布指令(如:#分类 技术)')
parser.add_argument('--status', '-s', choices=['publish', 'draft', 'pending', 'private'],
default=None, help='发布状态')
parser.add_argument('--category', '-c', type=int, help='指定分类 ID')
parser.add_argument('--tags', '-t', help='标签 ID 列表(逗号分隔)')
parser.add_argument('--dry-run', '-d', action='store_true', help='预览模式(不实际发布)')
args = parser.parse_args()
# 检查文件是否存在
if not os.path.exists(args.file):
print(f"❌ 文件不存在:{args.file}")
sys.exit(1)
# 解析标签
tags = None
if args.tags:
tags = [int(t.strip()) for t in args.tags.split(',') if t.strip()]
# 预览模式
if args.dry_run:
print("🔍 预览模式 - 解析文档内容:")
parse_result = parse_word_file(args.file)
print(f" 标题:{parse_result['title']}")
print(f" 段落数:{parse_result['metadata']['paragraph_count']}")
print(f" 图片数:{parse_result['metadata']['image_count']}")
print(f" 字数:{parse_result['metadata']['word_count']}")
print(f"\nHTML 内容预览:")
print('\n'.join(parse_result['content'][:10]))
print("...")
sys.exit(0)
# 执行发布
result = publish_word_document(
word_file_path=args.file,
instruction=args.instruction,
status=args.status,
category_id=args.category,
tags=tags
)
# 输出 JSON 结果
print("\n" + json.dumps(result, ensure_ascii=False, indent=2))
# 返回状态码
sys.exit(0 if result.get('success') else 1)
if __name__ == '__main__':
main()