- 飞书消息接收与处理(文字、图片、Word 文档) - WordPress REST API 文章发布 - 图片自动上传到媒体库 - Word 文档解析与发布 - HTML 格式化与分类自动匹配 - Python CLI 工具(避免 shell 引号冲突) - Webhook 服务器(8080 端口) - 完整日志系统
250 lines
8.4 KiB
Python
250 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
WordPress 发布系统 - 主发布脚本
|
||
整合 Word 解析、图片上传、分类匹配、文章发布全流程
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import argparse
|
||
|
||
# 添加项目根目录到 Python 路径
|
||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
sys.path.insert(0, BASE_DIR)
|
||
|
||
from modules.wp_logger import get_publish_logger, get_debug_logger
|
||
from modules.wp_parse_docx import parse_word_file
|
||
from modules.wp_image_handler import create_image_handler
|
||
from modules.wp_formatter import create_formatter
|
||
from modules.wp_api import create_wp_api
|
||
from modules.wp_category import create_category_matcher
|
||
|
||
# 配置文件路径
|
||
CONFIG_FILE = os.path.join(BASE_DIR, 'config.py')
|
||
|
||
|
||
def load_config():
|
||
"""加载配置文件"""
|
||
config = {
|
||
'wp_url': 'https://www.nanlou.net',
|
||
'wp_user': 'shaowu',
|
||
'wp_password': 'zjzz gHYm 8Q3l KbZk y4CF 2DQi',
|
||
'default_category': 7,
|
||
'auto_match_category': True,
|
||
'optimize_images': True,
|
||
'image_max_width': 1200,
|
||
'image_quality': 85,
|
||
'post_status': 'publish'
|
||
}
|
||
|
||
if os.path.exists(CONFIG_FILE):
|
||
try:
|
||
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||
exec(f.read(), config)
|
||
except Exception as e:
|
||
print(f"加载配置文件失败:{str(e)},使用默认配置")
|
||
|
||
return config
|
||
|
||
|
||
def publish_word_document(word_file_path, instruction=None, status=None, category_id=None, tags=None):
|
||
"""
|
||
发布 Word 文档到 WordPress
|
||
|
||
Args:
|
||
word_file_path: Word 文档路径
|
||
instruction: 指令文本(可选)
|
||
status: 发布状态(可选,默认从配置读取)
|
||
category_id: 指定分类 ID(可选)
|
||
tags: 标签列表(可选)
|
||
|
||
Returns:
|
||
dict: 发布结果
|
||
"""
|
||
# 初始化日志
|
||
pl = get_publish_logger()
|
||
dl = get_debug_logger()
|
||
|
||
# 加载配置
|
||
config = load_config()
|
||
|
||
# 初始化各模块
|
||
wp_api = create_wp_api(config['wp_url'], config['wp_user'], config['wp_password'])
|
||
image_handler = create_image_handler(config['wp_url'], config['wp_user'], config['wp_password'])
|
||
formatter = create_formatter()
|
||
category_matcher = create_category_matcher(wp_api)
|
||
|
||
# 开始发布
|
||
pl.start_publish('Word 文档', os.path.basename(word_file_path))
|
||
|
||
try:
|
||
# ========== 步骤 1:解析 Word 文档 ==========
|
||
dl.log_step("解析 Word 文档", word_file_path)
|
||
parse_result = parse_word_file(word_file_path)
|
||
|
||
title = parse_result['title']
|
||
content_parts = parse_result['content']
|
||
images = parse_result['images']
|
||
metadata = parse_result['metadata']
|
||
|
||
pl.info(f"📖 解析完成 - 标题:{title}, 段落数:{metadata['paragraph_count']}, 图片数:{metadata['image_count']}")
|
||
|
||
# ========== 步骤 2:上传图片 ==========
|
||
uploaded_images = []
|
||
if images:
|
||
dl.log_step("上传图片", f"共 {len(images)} 张图片")
|
||
uploaded_images = image_handler.upload_images_batch(images)
|
||
pl.info(f"📤 图片上传完成 - 成功 {len([img for img in uploaded_images if 'url' in img])} 张")
|
||
|
||
# ========== 步骤 3:匹配分类 ==========
|
||
if category_id:
|
||
final_category_id = category_id
|
||
else:
|
||
final_category_id = category_matcher.match(
|
||
instruction=instruction,
|
||
title=title,
|
||
content=' '.join(content_parts),
|
||
auto_match=config.get('auto_match_category', True)
|
||
)
|
||
|
||
# ========== 步骤 4:格式化 HTML ==========
|
||
dl.log_step("格式化 HTML 内容")
|
||
content_html = formatter.format_content(content_parts, uploaded_images)
|
||
|
||
# 生成摘要
|
||
excerpt = formatter.generate_excerpt(content_html)
|
||
|
||
# ========== 步骤 5:发布文章 ==========
|
||
dl.log_step("发布文章")
|
||
|
||
# 确定发布状态
|
||
post_status = status or config.get('post_status', 'publish')
|
||
|
||
# 构建发布数据
|
||
publish_data = {
|
||
'title': title,
|
||
'content': content_html,
|
||
'status': post_status,
|
||
'categories': [final_category_id],
|
||
'excerpt': excerpt
|
||
}
|
||
|
||
if tags:
|
||
publish_data['tags'] = tags
|
||
|
||
# 如果有上传的图片,设置第一张为特色图片
|
||
if uploaded_images and uploaded_images[0].get('url'):
|
||
# 获取特色图片 ID
|
||
featured_img = uploaded_images[0]
|
||
if 'id' in featured_img:
|
||
publish_data['featured_media'] = featured_img['id']
|
||
dl.debug(f"设置特色图片 ID: {featured_img['id']}")
|
||
|
||
# 调用 API 发布
|
||
result = wp_api.create_post(**publish_data)
|
||
|
||
# ========== 步骤 6:输出结果 ==========
|
||
if result.get('success'):
|
||
pl.end_publish(
|
||
True,
|
||
post_id=result.get('id'),
|
||
post_url=result.get('url')
|
||
)
|
||
|
||
# 清理临时文件
|
||
_cleanup_temp_files()
|
||
|
||
return {
|
||
'success': True,
|
||
'post_id': result.get('id'),
|
||
'post_url': result.get('url'),
|
||
'title': title,
|
||
'category_id': final_category_id,
|
||
'images_uploaded': len([img for img in uploaded_images if 'url' in img])
|
||
}
|
||
else:
|
||
pl.end_publish(False, error_msg=result.get('error'))
|
||
return {
|
||
'success': False,
|
||
'error': result.get('error')
|
||
}
|
||
|
||
except Exception as e:
|
||
pl.end_publish(False, error_msg=str(e))
|
||
dl.error(f"发布异常:{str(e)}", exc_info=True)
|
||
return {
|
||
'success': False,
|
||
'error': str(e)
|
||
}
|
||
|
||
|
||
def _cleanup_temp_files():
|
||
"""清理临时文件"""
|
||
temp_dir = os.path.join(BASE_DIR, 'temp')
|
||
if os.path.exists(temp_dir):
|
||
for filename in os.listdir(temp_dir):
|
||
file_path = os.path.join(temp_dir, filename)
|
||
try:
|
||
if os.path.isfile(file_path):
|
||
os.remove(file_path)
|
||
except Exception as e:
|
||
print(f"清理临时文件失败:{file_path}, {str(e)}")
|
||
|
||
|
||
def main():
|
||
"""命令行入口"""
|
||
parser = argparse.ArgumentParser(description='WordPress 文章发布工具')
|
||
parser.add_argument('file', help='Word 文档路径 (.docx)')
|
||
parser.add_argument('--instruction', '-i', help='发布指令(如:#分类 技术)')
|
||
parser.add_argument('--status', '-s', choices=['publish', 'draft', 'pending', 'private'],
|
||
default=None, help='发布状态')
|
||
parser.add_argument('--category', '-c', type=int, help='指定分类 ID')
|
||
parser.add_argument('--tags', '-t', help='标签 ID 列表(逗号分隔)')
|
||
parser.add_argument('--dry-run', '-d', action='store_true', help='预览模式(不实际发布)')
|
||
|
||
args = parser.parse_args()
|
||
|
||
# 检查文件是否存在
|
||
if not os.path.exists(args.file):
|
||
print(f"❌ 文件不存在:{args.file}")
|
||
sys.exit(1)
|
||
|
||
# 解析标签
|
||
tags = None
|
||
if args.tags:
|
||
tags = [int(t.strip()) for t in args.tags.split(',') if t.strip()]
|
||
|
||
# 预览模式
|
||
if args.dry_run:
|
||
print("🔍 预览模式 - 解析文档内容:")
|
||
parse_result = parse_word_file(args.file)
|
||
print(f" 标题:{parse_result['title']}")
|
||
print(f" 段落数:{parse_result['metadata']['paragraph_count']}")
|
||
print(f" 图片数:{parse_result['metadata']['image_count']}")
|
||
print(f" 字数:{parse_result['metadata']['word_count']}")
|
||
print(f"\nHTML 内容预览:")
|
||
print('\n'.join(parse_result['content'][:10]))
|
||
print("...")
|
||
sys.exit(0)
|
||
|
||
# 执行发布
|
||
result = publish_word_document(
|
||
word_file_path=args.file,
|
||
instruction=args.instruction,
|
||
status=args.status,
|
||
category_id=args.category,
|
||
tags=tags
|
||
)
|
||
|
||
# 输出 JSON 结果
|
||
print("\n" + json.dumps(result, ensure_ascii=False, indent=2))
|
||
|
||
# 返回状态码
|
||
sys.exit(0 if result.get('success') else 1)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|