Files
meijiaka-zy/tauri-app/src/pages/VideoCreation/VoiceSynthesis.tsx
T
小鱼开发 de7a6b734f chore(release): bump to v1.5.15
- 统一版本号管理(VERSION + scripts/bump-version.py)
- 添加 GitLab CI/CD 前端多平台构建配置
- 替换应用图标为品牌 logo
- 清理无效文件(tauri.svg, vite.svg, bg-config.json, audio/presets, .DS_Store)
- 修复 ESLint 错误和全部 warnings
- 清理 console.warn,保留 console.error
- 更新 Cargo.toml 元数据(description + authors)
- 更新 .gitignore(dist/, src-tauri/target/, binaries/)
- authStore appVersion 改为动态获取(getVersion)
- 修复 login 错误处理
- 将 FFmpeg sidecar 二进制移出 Git 跟踪(CI 构建时准备)
2026-05-14 23:32:45 +08:00

562 lines
22 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 配音合成页面 (Step 3)
* ======================
*
* 布局:左侧窄栏(音色 + 语速 + 生成按钮固定底部)| 右侧宽栏(配音文本)
*/
import { useState, useEffect, useMemo, useCallback, useRef } from 'react';
import { useProjectStore } from '../../store';
import { useVoiceStore } from '../../store/voiceStore';
import { getCurrentProjectId } from '../../api/modules/localStorage';
import { saveMetaToLocalFile } from '../../store/projectStore';
import { synthesizeTTS, saveAudio, uploadAudio, extractAudioSegment } from '../../api/modules/voice';
import { toast } from '../../store/uiStore';
import type { AlignmentResult } from '../../api/types';
import { useProgressStore } from '../../store/progressStore';
import { usePointsCheck } from '../../hooks/usePointsCheck';
import { getFriendlyErrorMessage } from '../../utils/errorMessage';
import { createTask, getTaskStatus } from '../../api/modules/task';
import { matchSegmentsToUtterances } from '../../utils/audioAlign';
import { uploadAudioFile } from '../../api/modules/videoCompose';
import { localProjectApi } from '../../api/modules/localStorage';
import './VoiceSynthesis.css';
export default function VoiceSynthesis() {
const projectId = getCurrentProjectId();
const segments = useProjectStore(state => state.segments);
const updateSegment = useProjectStore(state => state.updateSegment);
const selectedVoiceId = useProjectStore(state => state.selectedVoiceId);
const speed = useProjectStore(state => state.voiceSpeed);
const volume = useProjectStore(state => state.voiceVolume);
const pitch = useProjectStore(state => state.voicePitch);
const setSelectedVoiceId = useProjectStore(state => state.setSelectedVoiceId);
const setSpeed = useProjectStore(state => state.setVoiceSpeed);
const setVolume = useProjectStore(state => state.setVoiceVolume);
const setPitch = useProjectStore(state => state.setVoicePitch);
const {
presetVoices,
voiceMaterials,
loadPresetVoices,
loadVoiceMaterials,
} = useVoiceStore();
const [isGenerating, setIsGenerating] = useState(false);
const [activeVoiceTab, setActiveVoiceTab] = useState<'preset' | 'clone'>('preset');
const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
const audioInstanceRef = useRef<HTMLAudioElement | null>(null);
// 使用 store 中持久化的配音 URL(本地 state 刷新后会丢失)
const dubbingAudioUrl = useProjectStore(state => state.dubbingAudioUrl);
const [isPlayingGenerated, setIsPlayingGenerated] = useState(false);
const generatedAudioRef = useRef<HTMLAudioElement | null>(null);
const hasGeneratedAudio = !!dubbingAudioUrl;
useEffect(() => {
loadPresetVoices();
loadVoiceMaterials();
}, [loadPresetVoices, loadVoiceMaterials, projectId]);
// 组件卸载时清理音频播放
useEffect(() => {
return () => {
audioInstanceRef.current?.pause();
generatedAudioRef.current?.pause();
audioInstanceRef.current = null;
generatedAudioRef.current = null;
};
}, []);
const mergedText = useMemo(
() => segments.map(s => s.voiceover?.trim() || '【空镜】').join('\n'),
[segments]
);
const totalChars = mergedText.length;
// TTS 预计积分:按 0.25 秒/字(后端配置 seconds_per_char: 0.25),除以语速倍速
// 加上镜头切换停顿(segment↔empty_shot: 0.5s,同类型: 0.3s),每 5 秒 1 积分,最低 1 积分
const estimatedTtsPoints = useMemo(() => {
const validSegments = segments.filter(s => s.voiceover?.trim());
if (validSegments.length === 0) {return { min: 0, max: 0 };}
// 纯朗读时间(与后端配置 seconds_per_char: 0.25 保持一致)
const totalChars = validSegments.reduce((sum, s) => sum + s.voiceover!.trim().length, 0);
const speechSeconds = (totalChars * 0.25) / (speed || 1);
// 镜头切换停顿时间(与 handleGenerate 中插入 <#x#> 标记的逻辑一致)
let pauseSeconds = 0;
for (let i = 0; i < validSegments.length - 1; i++) {
const s = validSegments[i];
const next = validSegments[i + 1];
if (s.type !== next?.type) {
pauseSeconds += 0.5; // segment ↔ empty_shot 长停顿
} else {
pauseSeconds += 0.3; // 同类型短停顿
}
}
const totalSeconds = speechSeconds + pauseSeconds;
const base = Math.max(1, Math.ceil(totalSeconds / 5));
// 范围:±1 积分,覆盖取整误差和语速波动
const min = Math.max(1, base - 1);
const max = base + 1;
return { min, max };
}, [segments, speed]);
const handlePlayPause = useCallback((voiceId: string, url: string | null, e: React.MouseEvent) => {
e.stopPropagation();
if (!url) {return;}
if (playingVoiceId === voiceId) {
// 暂停当前
audioInstanceRef.current?.pause();
setPlayingVoiceId(null);
audioInstanceRef.current = null;
return;
}
// 停止之前的
audioInstanceRef.current?.pause();
// 播放新的
const audio = new Audio(url);
audio.onended = () => {
if (audioInstanceRef.current === audio) {
setPlayingVoiceId(null);
audioInstanceRef.current = null;
}
};
audio.onpause = () => {
if (audioInstanceRef.current === audio) {
setPlayingVoiceId(null);
audioInstanceRef.current = null;
}
};
audio.play();
audioInstanceRef.current = audio;
setPlayingVoiceId(voiceId);
}, [playingVoiceId]);
const handleAlignAndClip = useCallback(async (
dubbingAudioUrl: string,
dubbingAudioPath: string
) => {
if (!projectId) {return;}
const progress = useProgressStore.getState();
try {
// 1. 拼接完整文本用于打轴(需与 TTS 文本顺序完全一致,包括 empty_shot
const fullText = segments
.filter(s => s.voiceover?.trim())
.map(s => s.voiceover!.trim())
.join('\n');
if (!fullText) {return;}
// 2. 提交字幕打轴任务(走 scheduler)
progress.update('正在提交字幕任务...');
const task = await createTask('subtitle', {
mode: 'auto_align',
videoPath: dubbingAudioUrl,
audioText: fullText,
});
// 3. 轮询等待结果
progress.update('正在处理字幕...');
const pollInterval = 1000;
const timeout = 120_000; // 2 分钟
const start = Date.now();
type AutoAlignResult = { duration: number; utterances: Array<{ text: string; startTime: number; endTime: number }> };
let alignResult: AutoAlignResult | null = null;
while (Date.now() - start < timeout) {
const status = await getTaskStatus(task.taskId);
if (status.status === 'completed') {
alignResult = status.result as AutoAlignResult;
break;
} else if (status.status === 'failed') {
throw new Error(status.error || status.message || '字幕处理失败');
}
const elapsed = Math.floor((Date.now() - start) / 1000);
progress.update(`字幕处理中... ${elapsed}s`);
await new Promise(resolve => setTimeout(resolve, pollInterval));
}
if (!alignResult) {
progress.error('字幕处理超时');
return;
}
if (!alignResult.utterances?.length) {
progress.error('字幕处理异常');
return;
}
// 4. 文本匹配
const matchSegments = segments
.filter(s => s.voiceover?.trim())
.map(s => ({ id: s.id, voiceover: s.voiceover || '' }));
const matched = matchSegmentsToUtterances(matchSegments, alignResult.utterances);
if (!matched.length) {
progress.error('音频对齐失败');
return;
}
// 5. 截取音频片段并上传
progress.update('正在整理音频...');
const audiosDir = dubbingAudioPath.replace(/\\/g, '/').split('/').slice(0, -1).join('/');
for (const m of matched) {
const seg = segments.find(s => s.id === m.segmentId);
if (!seg) {continue;}
// 所有分镜都保存实际时间信息(供后续步骤精确匹配素材)
updateSegment(m.segmentId, {
audioStartTime: m.startTime,
audioEndTime: m.endTime,
actualDuration: m.actualDuration,
});
// 只截取【分镜】类型的音频片段,空镜跳过
if (seg.type !== 'segment') {continue;}
const outputPath = `${audiosDir}/segment_${m.segmentId}.mp3`;
try {
await extractAudioSegment({
inputPath: dubbingAudioPath,
start: m.startTime / 1000,
duration: m.actualDuration,
outputPath,
});
const clipUrl = await uploadAudioFile(outputPath);
updateSegment(m.segmentId, {
clipAudioPath: outputPath,
clipAudioUrl: clipUrl,
});
} catch (e) {
console.error(`[VoiceSynthesis] Segment ${m.segmentId} 截取/上传失败:`, e);
}
}
// 6. 保存 segments.json
const currentSegments = useProjectStore.getState().segments;
const segmentsSaved = await localProjectApi.saveSegments(projectId, currentSegments);
if (!segmentsSaved) {
console.error('[VoiceSynthesis] segments.json 保存失败');
toast.error('分镜数据保存失败,请重试');
return;
}
// 7. 保存字幕打轴结果到 meta,供 Step 4 直接复用
const subtitleAlignment: AlignmentResult = {
status: 'completed',
utterances: alignResult.utterances.map(u => ({
text: u.text,
start_time: u.startTime,
end_time: u.endTime,
})),
duration: alignResult.duration,
};
useProjectStore.setState({ subtitleAlignment });
await saveMetaToLocalFile({ subtitleAlignment });
// 注意:不在这里调用 progress.success,最终成功态由调用方 handleGenerate 统一设置
} catch (err) {
console.error('[VoiceSynthesis] 打轴截取流程失败:', err);
progress.error(getFriendlyErrorMessage(err, '音频处理失败,请稍后重试'));
}
}, [projectId, segments, updateSegment]);
const { checkBalance, handleError, PointsModal } = usePointsCheck();
const handleGenerate = useCallback(async () => {
if (!projectId) { toast.warning('请先创建项目'); return; }
// 拼接 TTS 文本,根据镜头切换类型插入停顿标记
const realText = segments
.filter(s => s.voiceover?.trim())
.map((s, i, arr) => {
const text = s.voiceover!.trim();
if (i === arr.length - 1) {return text;}
const next = arr[i + 1];
// segment ↔ empty_shot 切换:长停顿,让观众看清画面
if (s.type !== next?.type) {
return text + '<#0.5#>';
}
// 同类型之间(segment→segment / empty_shot→empty_shot):短停顿,保持节奏
return text + '<#0.3#>';
})
.join('\n');
if (!realText) { toast.warning('没有需要合成的旁白文本'); return; }
// Vidu TTS 限制单次 ≤10000 字符,超长自动截断
const truncatedText = realText.length > 10000 ? realText.slice(0, 10000) : realText;
// 直接从 store 获取最新音色,避免闭包捕获旧值
const currentVoiceId = useProjectStore.getState().selectedVoiceId;
const currentSpeed = useProjectStore.getState().voiceSpeed;
const currentVolume = useProjectStore.getState().voiceVolume;
const currentPitch = useProjectStore.getState().voicePitch;
if (!currentVoiceId) { toast.warning('请先选择音色'); setIsGenerating(false); return; }
// 前置积分检查(宽松模式:余额为正即可执行,TTS 实际消耗不确定,允许欠费)
const ok = await checkBalance(estimatedTtsPoints, '配音合成', false);
if (!ok) {return;}
const progress = useProgressStore.getState();
setIsGenerating(true);
progress.show('配音合成');
try {
progress.update('正在合成配音...');
const result = await synthesizeTTS({ text: truncatedText, voiceId: currentVoiceId, speed: currentSpeed, volume: currentVolume, pitch: currentPitch });
if (!result.audioUrl) {throw new Error('未返回音频 URL');}
progress.update('正在处理音频...');
// 下载音频 blob
const response = await fetch(result.audioUrl);
if (!response.ok) {throw new Error('下载音频失败');}
const blob = await response.blob();
// 上传七牛云
const file = new File([blob], `tts_${Date.now()}.mp3`, { type: 'audio/mp3' });
const qiniuUrl = await uploadAudio(file);
// 本地保存
const base64 = await new Promise<string>((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => {
const dataUrl = reader.result as string;
resolve(dataUrl.split(',')[1]);
};
reader.onerror = reject;
reader.readAsDataURL(blob);
});
const audioId = `voice_${Date.now()}`;
const meta = await saveAudio({
projectId, audioId, audioData: base64,
name: `配音合成-${segments.length}`, voiceId: currentVoiceId || 'tianxin_xiaoling', duration: 0,
});
// 更新 projectStore 和 meta.json(项目级配音信息)
useProjectStore.setState({
dubbingAudioUrl: qiniuUrl,
dubbingAudioPath: meta.filePath,
});
await saveMetaToLocalFile({
dubbingAudioUrl: qiniuUrl,
dubbingAudioPath: meta.filePath,
});
// dubbingAudioUrl 已通过 store 持久化,无需再存到组件本地 state
// 生成完成后自动执行打轴+截取
await handleAlignAndClip(qiniuUrl, meta.filePath);
progress.success('配音合成完成', result.consumedPoints);
} catch (err) {
if (handleError(err, '配音合成', estimatedTtsPoints.max)) {
progress.hide();
return;
}
progress.error(getFriendlyErrorMessage(err, '配音合成失败,请稍后重试'));
} finally {
setIsGenerating(false);
}
}, [projectId, segments, handleAlignAndClip, checkBalance, handleError, estimatedTtsPoints]);
const handleToggleGeneratedAudio = useCallback(() => {
if (!dubbingAudioUrl) {return;}
if (isPlayingGenerated) {
generatedAudioRef.current?.pause();
setIsPlayingGenerated(false);
generatedAudioRef.current = null;
return;
}
const audio = new Audio(dubbingAudioUrl);
audio.onended = () => {
setIsPlayingGenerated(false);
generatedAudioRef.current = null;
};
audio.onpause = () => {
setIsPlayingGenerated(false);
generatedAudioRef.current = null;
};
audio.play();
generatedAudioRef.current = audio;
setIsPlayingGenerated(true);
}, [dubbingAudioUrl, isPlayingGenerated]);
return (
<div className="voice-dubbing">
<div className="dubbing-layout">
{/* 左侧:音色 + 语速 + 生成按钮 */}
<div className="voice-sidebar">
{/* 音色选择 */}
<div className="voice-section">
<div className="voice-section-header">
<span className="voice-section-title"></span>
</div>
<div className="voice-tabs">
<button className={`voice-tab ${activeVoiceTab === 'preset' ? 'active' : ''}`} onClick={() => setActiveVoiceTab('preset')}>
({presetVoices.length})
</button>
<button className={`voice-tab ${activeVoiceTab === 'clone' ? 'active' : ''}`} onClick={() => setActiveVoiceTab('clone')}>
({voiceMaterials.filter(m => m.status === 'ready').length})
</button>
</div>
{activeVoiceTab === 'preset' && (
<div className="voice-list">
{presetVoices.map(v => (
<div key={v.voiceId} className={`voice-row ${v.voiceId === selectedVoiceId ? 'selected' : ''}`} onClick={() => { setSelectedVoiceId(v.voiceId); saveMetaToLocalFile({ selectedVoiceId: v.voiceId }); }}>
<div className="voice-row-main">
<div className="voice-row-info">
<div className="voice-row-name">
{v.name}
<span className="voice-row-desc-inline">{v.description}</span>
</div>
</div>
<button className="preview-icon" onClick={e => handlePlayPause(v.voiceId, v.previewUrl ?? null, e)}>
{playingVoiceId === v.voiceId ? '⏸' : '▶'}
</button>
</div>
</div>
))}
</div>
)}
{activeVoiceTab === 'clone' && (
<div className="voice-list">
{voiceMaterials.filter(m => m.status === 'ready').length === 0 ? (
<div className="voice-empty"><br /><small></small></div>
) : (
voiceMaterials.filter(m => m.status === 'ready').map(m => (
<div key={m.voiceId} className={`voice-row ${m.voiceId === selectedVoiceId ? 'selected' : ''}`} onClick={() => { setSelectedVoiceId(m.voiceId); saveMetaToLocalFile({ selectedVoiceId: m.voiceId }); }}>
<div className="voice-row-main">
<div className="voice-row-info">
<div className="voice-row-name">
{m.name} <span className="tag clone"></span>
<span className="voice-row-desc-inline">
{m.createdAt ? new Date(m.createdAt).toLocaleDateString('zh-CN') : ''}
</span>
</div>
</div>
<button className="preview-icon" onClick={e => handlePlayPause(m.voiceId, m.sourceUrl, e)}>
{playingVoiceId === m.voiceId ? '⏸' : '▶'}
</button>
</div>
</div>
))
)}
</div>
)}
</div>
{/* 语速 */}
<div className="voice-section">
<div className="voice-section-header">
<span className="voice-section-title"></span>
<span className="speed-value">{speed.toFixed(1)}x</span>
</div>
<div className="speed-slider-wrap">
<span>0.5x</span>
<input
type="range"
className="slider-input"
min={5}
max={20}
step={1}
value={Math.round(speed * 10)}
onChange={e => { const v = parseInt(e.target.value) / 10; setSpeed(v); saveMetaToLocalFile({ voiceSpeed: v }); }}
style={{ '--slider-percent': `${((Math.round(speed * 10) - 5) / 15) * 100}%` } as React.CSSProperties}
/>
<span>2.0x</span>
</div>
</div>
{/* 音量 */}
<div className="voice-section">
<div className="voice-section-header">
<span className="voice-section-title"></span>
<span className="speed-value">{volume}</span>
</div>
<div className="speed-slider-wrap">
<span>0</span>
<input
type="range"
className="slider-input"
min={0}
max={10}
step={1}
value={volume}
onChange={e => { const v = parseInt(e.target.value); setVolume(v); saveMetaToLocalFile({ voiceVolume: v }); }}
style={{ '--slider-percent': `${(volume / 10) * 100}%` } as React.CSSProperties}
/>
<span>10</span>
</div>
</div>
{/* 音调 */}
<div className="voice-section">
<div className="voice-section-header">
<span className="voice-section-title"></span>
<span className="speed-value">{pitch}</span>
</div>
<div className="speed-slider-wrap">
<span>-12</span>
<input
type="range"
className="slider-input"
min={-12}
max={12}
step={1}
value={pitch}
onChange={e => { const v = parseInt(e.target.value); setPitch(v); saveMetaToLocalFile({ voicePitch: v }); }}
style={{ '--slider-percent': `${((pitch + 12) / 24) * 100}%` } as React.CSSProperties}
/>
<span>12</span>
</div>
</div>
{/* 底部生成按钮 */}
<div className="voice-generate-wrap">
{!hasGeneratedAudio ? (
<button className="btn btn-primary generate-btn" onClick={handleGenerate} disabled={isGenerating || !mergedText.trim()}>
{isGenerating ? '合成中...' : `合成配音(预计消耗 ${estimatedTtsPoints.min}~${estimatedTtsPoints.max} 积分)`}
</button>
) : (
<div className="voice-generate-btns">
<button className="btn btn-secondary generate-btn" onClick={handleGenerate} disabled={isGenerating || !mergedText.trim()}>
{isGenerating ? '合成中...' : `重新生成(预计消耗 ${estimatedTtsPoints.min}~${estimatedTtsPoints.max} 积分)`}
</button>
<button
className="btn btn-primary generate-btn"
onClick={handleToggleGeneratedAudio}
>
{isPlayingGenerated ? '⏸ 暂停播放' : '▶ 试听播放'}
</button>
</div>
)}
</div>
</div>
{/* 右侧:配音文本 */}
<div className="script-content">
<div className="script-content-header">
<span className="script-content-meta">{totalChars} · {segments.length} </span>
</div>
<textarea readOnly value={mergedText} rows={20} className="script-textarea" />
</div>
</div>
<PointsModal />
</div>
);
}