meijiaka-zy/tauri-app/src/pages/VideoCreation/VoiceSynthesis.tsx

/**
 * 配音合成页面 (Step 3)
 * ======================
 *
 * 布局：左侧窄栏（音色 + 语速 + 生成按钮固定底部）| 右侧宽栏（配音文本）
 */

import { useState, useEffect, useMemo, useCallback, useRef } from 'react';
import { useProjectStore } from '../../store';
import { useVoiceStore } from '../../store/voiceStore';
import { getCurrentProjectId } from '../../api/modules/localStorage';
import { saveMetaToLocalFile } from '../../store/projectStore';
import { synthesizeTTS, saveAudio, uploadAudio, extractAudioSegment } from '../../api/modules/voice';
import { toast } from '../../store/uiStore';
import type { AlignmentResult } from '../../api/types';
import { useProgressStore } from '../../store/progressStore';
import { usePointsCheck } from '../../hooks/usePointsCheck';
import { getFriendlyErrorMessage } from '../../utils/errorMessage';
import { createTask, getTaskStatus } from '../../api/modules/task';
import { matchSegmentsToUtterances } from '../../utils/audioAlign';
import { uploadAudioFile } from '../../api/modules/videoCompose';
import { localProjectApi } from '../../api/modules/localStorage';
import './VoiceSynthesis.css';

export default function VoiceSynthesis() {
  const projectId = getCurrentProjectId();
  const segments = useProjectStore(state => state.segments);
  const updateSegment = useProjectStore(state => state.updateSegment);
  const selectedVoiceId = useProjectStore(state => state.selectedVoiceId);
  const speed = useProjectStore(state => state.voiceSpeed);
  const volume = useProjectStore(state => state.voiceVolume);
  const pitch = useProjectStore(state => state.voicePitch);
  const setSelectedVoiceId = useProjectStore(state => state.setSelectedVoiceId);
  const setSpeed = useProjectStore(state => state.setVoiceSpeed);
  const setVolume = useProjectStore(state => state.setVoiceVolume);
  const setPitch = useProjectStore(state => state.setVoicePitch);

  const {
    presetVoices,
    voiceMaterials,
    loadPresetVoices,
    loadVoiceMaterials,
  } = useVoiceStore();

  const [isGenerating, setIsGenerating] = useState(false);
  const [activeVoiceTab, setActiveVoiceTab] = useState<'preset' | 'clone'>('preset');
  const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
  const audioInstanceRef = useRef<HTMLAudioElement | null>(null);

  // 使用 store 中持久化的配音 URL（本地 state 刷新后会丢失）
  const dubbingAudioUrl = useProjectStore(state => state.dubbingAudioUrl);
  const [isPlayingGenerated, setIsPlayingGenerated] = useState(false);
  const generatedAudioRef = useRef<HTMLAudioElement | null>(null);

  const hasGeneratedAudio = !!dubbingAudioUrl;

  useEffect(() => {
    loadPresetVoices();
    loadVoiceMaterials();
  }, [loadPresetVoices, loadVoiceMaterials, projectId]);

  // 组件卸载时清理音频播放
  useEffect(() => {
    return () => {
      audioInstanceRef.current?.pause();
      generatedAudioRef.current?.pause();
      audioInstanceRef.current = null;
      generatedAudioRef.current = null;
    };
  }, []);

  const mergedText = useMemo(
    () => segments.map(s => s.voiceover?.trim() || '【空镜】').join('\n'),
    [segments]
  );
  const totalChars = mergedText.length;

  // TTS 预计积分：按 0.25 秒/字（后端配置 seconds_per_char: 0.25），除以语速倍速
  // 加上镜头切换停顿（segment↔empty_shot: 0.5s，同类型: 0.3s），每 5 秒 1 积分，最低 1 积分
  const estimatedTtsPoints = useMemo(() => {
    const validSegments = segments.filter(s => s.voiceover?.trim());
    if (validSegments.length === 0) {return { min: 0, max: 0 };}

    // 纯朗读时间（与后端配置 seconds_per_char: 0.25 保持一致）
    const totalChars = validSegments.reduce((sum, s) => sum + s.voiceover!.trim().length, 0);
    const speechSeconds = (totalChars * 0.25) / (speed || 1);

    // 镜头切换停顿时间（与 handleGenerate 中插入 <#x#> 标记的逻辑一致）
    let pauseSeconds = 0;
    for (let i = 0; i < validSegments.length - 1; i++) {
      const s = validSegments[i];
      const next = validSegments[i + 1];
      if (s.type !== next?.type) {
        pauseSeconds += 0.5; // segment ↔ empty_shot 长停顿
      } else {
        pauseSeconds += 0.3; // 同类型短停顿
      }
    }

    const totalSeconds = speechSeconds + pauseSeconds;
    const base = Math.max(1, Math.ceil(totalSeconds / 5));
    // 范围：±1 积分，覆盖取整误差和语速波动
    const min = Math.max(1, base - 1);
    const max = base + 1;
    return { min, max };
  }, [segments, speed]);

  const handlePlayPause = useCallback((voiceId: string, url: string | null, e: React.MouseEvent) => {
    e.stopPropagation();
    if (!url) {return;}

    if (playingVoiceId === voiceId) {
      // 暂停当前
      audioInstanceRef.current?.pause();
      setPlayingVoiceId(null);
      audioInstanceRef.current = null;
      return;
    }

    // 停止之前的
    audioInstanceRef.current?.pause();

    // 播放新的
    const audio = new Audio(url);
    audio.onended = () => {
      if (audioInstanceRef.current === audio) {
        setPlayingVoiceId(null);
        audioInstanceRef.current = null;
      }
    };
    audio.onpause = () => {
      if (audioInstanceRef.current === audio) {
        setPlayingVoiceId(null);
        audioInstanceRef.current = null;
      }
    };
    audio.play();
    audioInstanceRef.current = audio;
    setPlayingVoiceId(voiceId);
  }, [playingVoiceId]);

  const handleAlignAndClip = useCallback(async (
    dubbingAudioUrl: string,
    dubbingAudioPath: string
  ) => {
    if (!projectId) {return;}
    const progress = useProgressStore.getState();

    try {
      // 1. 拼接完整文本用于打轴（需与 TTS 文本顺序完全一致，包括 empty_shot）
      const fullText = segments
        .filter(s => s.voiceover?.trim())
        .map(s => s.voiceover!.trim())
        .join('\n');
      if (!fullText) {return;}

      // 2. 提交字幕打轴任务（走 scheduler）
      progress.update('正在提交字幕任务...');
      const task = await createTask('subtitle', {
        mode: 'auto_align',
        videoPath: dubbingAudioUrl,
        audioText: fullText,
      });

      // 3. 轮询等待结果
      progress.update('正在处理字幕...');
      const pollInterval = 1000;
      const timeout = 120_000; // 2 分钟
      const start = Date.now();
      type AutoAlignResult = { duration: number; utterances: Array<{ text: string; startTime: number; endTime: number }> };
      let alignResult: AutoAlignResult | null = null;

      while (Date.now() - start < timeout) {
        const status = await getTaskStatus(task.taskId);
        if (status.status === 'completed') {
          alignResult = status.result as AutoAlignResult;
          break;
        } else if (status.status === 'failed') {
          throw new Error(status.error || status.message || '字幕处理失败');
        }
        const elapsed = Math.floor((Date.now() - start) / 1000);
        progress.update(`字幕处理中... ${elapsed}s`);
        await new Promise(resolve => setTimeout(resolve, pollInterval));
      }

      if (!alignResult) {
        progress.error('字幕处理超时');
        return;
      }
      if (!alignResult.utterances?.length) {
        progress.error('字幕处理异常');
        return;
      }

      // 4. 文本匹配
      const matchSegments = segments
        .filter(s => s.voiceover?.trim())
        .map(s => ({ id: s.id, voiceover: s.voiceover || '' }));
      const matched = matchSegmentsToUtterances(matchSegments, alignResult.utterances);
      if (!matched.length) {
        progress.error('音频对齐失败');
        return;
      }

      // 5. 截取音频片段并上传
      progress.update('正在整理音频...');
      const audiosDir = dubbingAudioPath.replace(/\\/g, '/').split('/').slice(0, -1).join('/');

      for (const m of matched) {
        const seg = segments.find(s => s.id === m.segmentId);
        if (!seg) {continue;}

        // 所有分镜都保存实际时间信息（供后续步骤精确匹配素材）
        updateSegment(m.segmentId, {
          audioStartTime: m.startTime,
          audioEndTime: m.endTime,
          actualDuration: m.actualDuration,
        });

        // 只截取【分镜】类型的音频片段，空镜跳过
        if (seg.type !== 'segment') {continue;}

        const outputPath = `${audiosDir}/segment_${m.segmentId}.mp3`;

        try {
          await extractAudioSegment({
            inputPath: dubbingAudioPath,
            start: m.startTime / 1000,
            duration: m.actualDuration,
            outputPath,
          });

          const clipUrl = await uploadAudioFile(outputPath);

          updateSegment(m.segmentId, {
            clipAudioPath: outputPath,
            clipAudioUrl: clipUrl,
          });
        } catch (e) {
          console.error(`[VoiceSynthesis] Segment ${m.segmentId} 截取/上传失败:`, e);
        }
      }

      // 6. 保存 segments.json
      const currentSegments = useProjectStore.getState().segments;
      const segmentsSaved = await localProjectApi.saveSegments(projectId, currentSegments);
      if (!segmentsSaved) {
        console.error('[VoiceSynthesis] segments.json 保存失败');
        toast.error('分镜数据保存失败，请重试');
        return;
      }

      // 7. 保存字幕打轴结果到 meta，供 Step 4 直接复用
      const subtitleAlignment: AlignmentResult = {
        status: 'completed',
        utterances: alignResult.utterances.map(u => ({
          text: u.text,
          start_time: u.startTime,
          end_time: u.endTime,
        })),
        duration: alignResult.duration,
      };
      useProjectStore.setState({ subtitleAlignment });
      await saveMetaToLocalFile({ subtitleAlignment });

      // 注意：不在这里调用 progress.success，最终成功态由调用方 handleGenerate 统一设置
    } catch (err) {
      console.error('[VoiceSynthesis] 打轴截取流程失败:', err);
      progress.error(getFriendlyErrorMessage(err, '音频处理失败，请稍后重试'));
    }
  }, [projectId, segments, updateSegment]);

  const { checkBalance, handleError, PointsModal } = usePointsCheck();

  const handleGenerate = useCallback(async () => {
    if (!projectId) { toast.warning('请先创建项目'); return; }
    // 拼接 TTS 文本，根据镜头切换类型插入停顿标记
    const realText = segments
      .filter(s => s.voiceover?.trim())
      .map((s, i, arr) => {
        const text = s.voiceover!.trim();
        if (i === arr.length - 1) {return text;}
        const next = arr[i + 1];
        // segment ↔ empty_shot 切换：长停顿，让观众看清画面
        if (s.type !== next?.type) {
          return text + '<#0.5#>';
        }
        // 同类型之间（segment→segment / empty_shot→empty_shot）：短停顿，保持节奏
        return text + '<#0.3#>';
      })
      .join('\n');
    if (!realText) { toast.warning('没有需要合成的旁白文本'); return; }
    // Vidu TTS 限制单次 ≤10000 字符，超长自动截断
    const truncatedText = realText.length > 10000 ? realText.slice(0, 10000) : realText;

    // 直接从 store 获取最新音色，避免闭包捕获旧值
    const currentVoiceId = useProjectStore.getState().selectedVoiceId;
    const currentSpeed = useProjectStore.getState().voiceSpeed;
    const currentVolume = useProjectStore.getState().voiceVolume;
    const currentPitch = useProjectStore.getState().voicePitch;
    if (!currentVoiceId) { toast.warning('请先选择音色'); setIsGenerating(false); return; }

    // 前置积分检查（宽松模式：余额为正即可执行，TTS 实际消耗不确定，允许欠费）
    const ok = await checkBalance(estimatedTtsPoints, '配音合成', false);
    if (!ok) {return;}

    const progress = useProgressStore.getState();
    setIsGenerating(true);
    progress.show('配音合成');

    try {
      progress.update('正在合成配音...');
      const result = await synthesizeTTS({ text: truncatedText, voiceId: currentVoiceId, speed: currentSpeed, volume: currentVolume, pitch: currentPitch });
      if (!result.audioUrl) {throw new Error('未返回音频 URL');}

      progress.update('正在处理音频...');
      // 下载音频 blob
      const response = await fetch(result.audioUrl);
      if (!response.ok) {throw new Error('下载音频失败');}
      const blob = await response.blob();

      // 上传七牛云
      const file = new File([blob], `tts_${Date.now()}.mp3`, { type: 'audio/mp3' });
      const qiniuUrl = await uploadAudio(file);

      // 本地保存
      const base64 = await new Promise<string>((resolve, reject) => {
        const reader = new FileReader();
        reader.onloadend = () => {
          const dataUrl = reader.result as string;
          resolve(dataUrl.split(',')[1]);
        };
        reader.onerror = reject;
        reader.readAsDataURL(blob);
      });

      const audioId = `voice_${Date.now()}`;
      const meta = await saveAudio({
        projectId, audioId, audioData: base64,
        name: `配音合成-${segments.length}段`, voiceId: currentVoiceId || 'tianxin_xiaoling', duration: 0,
      });

      // 更新 projectStore 和 meta.json（项目级配音信息）
      useProjectStore.setState({
        dubbingAudioUrl: qiniuUrl,
        dubbingAudioPath: meta.filePath,
      });
      await saveMetaToLocalFile({
        dubbingAudioUrl: qiniuUrl,
        dubbingAudioPath: meta.filePath,
      });

      // dubbingAudioUrl 已通过 store 持久化，无需再存到组件本地 state

      // 生成完成后自动执行打轴+截取
      await handleAlignAndClip(qiniuUrl, meta.filePath);

      progress.success('配音合成完成', result.consumedPoints);
    } catch (err) {
      if (handleError(err, '配音合成', estimatedTtsPoints.max)) {
        progress.hide();
        return;
      }
      progress.error(getFriendlyErrorMessage(err, '配音合成失败，请稍后重试'));
    } finally {
      setIsGenerating(false);
    }
  }, [projectId, segments, handleAlignAndClip, checkBalance, handleError, estimatedTtsPoints]);

  const handleToggleGeneratedAudio = useCallback(() => {
    if (!dubbingAudioUrl) {return;}

    if (isPlayingGenerated) {
      generatedAudioRef.current?.pause();
      setIsPlayingGenerated(false);
      generatedAudioRef.current = null;
      return;
    }

    const audio = new Audio(dubbingAudioUrl);
    audio.onended = () => {
      setIsPlayingGenerated(false);
      generatedAudioRef.current = null;
    };
    audio.onpause = () => {
      setIsPlayingGenerated(false);
      generatedAudioRef.current = null;
    };
    audio.play();
    generatedAudioRef.current = audio;
    setIsPlayingGenerated(true);
  }, [dubbingAudioUrl, isPlayingGenerated]);

  return (
    <div className="voice-dubbing">
      <div className="dubbing-layout">
        {/* 左侧：音色 + 语速 + 生成按钮 */}
        <div className="voice-sidebar">
          {/* 音色选择 */}
          <div className="voice-section">
            <div className="voice-section-header">
              <span className="voice-section-title">选择音色</span>
            </div>

            <div className="voice-tabs">
              <button className={`voice-tab ${activeVoiceTab === 'preset' ? 'active' : ''}`} onClick={() => setActiveVoiceTab('preset')}>
                系统预设 ({presetVoices.length})
              </button>
              <button className={`voice-tab ${activeVoiceTab === 'clone' ? 'active' : ''}`} onClick={() => setActiveVoiceTab('clone')}>
                私有音色 ({voiceMaterials.filter(m => m.status === 'ready').length})
              </button>
            </div>

            {activeVoiceTab === 'preset' && (
              <div className="voice-list">
                {presetVoices.map(v => (
                  <div key={v.voiceId} className={`voice-row ${v.voiceId === selectedVoiceId ? 'selected' : ''}`} onClick={() => { setSelectedVoiceId(v.voiceId); saveMetaToLocalFile({ selectedVoiceId: v.voiceId }); }}>
                    <div className="voice-row-main">
                      <div className="voice-row-info">
                        <div className="voice-row-name">
                          {v.name}
                          <span className="voice-row-desc-inline">{v.description}</span>
                        </div>
                      </div>
                      <button className="preview-icon" onClick={e => handlePlayPause(v.voiceId, v.previewUrl ?? null, e)}>
                        {playingVoiceId === v.voiceId ? '⏸' : '▶'}
                      </button>
                    </div>
                  </div>
                ))}
              </div>
            )}

            {activeVoiceTab === 'clone' && (
              <div className="voice-list">
                {voiceMaterials.filter(m => m.status === 'ready').length === 0 ? (
                  <div className="voice-empty">暂无私有音色<br /><small>去素材库上传音频并克隆音色</small></div>
                ) : (
                  voiceMaterials.filter(m => m.status === 'ready').map(m => (
                    <div key={m.voiceId} className={`voice-row ${m.voiceId === selectedVoiceId ? 'selected' : ''}`} onClick={() => { setSelectedVoiceId(m.voiceId); saveMetaToLocalFile({ selectedVoiceId: m.voiceId }); }}>
                      <div className="voice-row-main">
                        <div className="voice-row-info">
                          <div className="voice-row-name">
                            {m.name} <span className="tag clone">克隆</span>
                            <span className="voice-row-desc-inline">
                              {m.createdAt ? new Date(m.createdAt).toLocaleDateString('zh-CN') : ''}
                            </span>
                          </div>
                        </div>
                        <button className="preview-icon" onClick={e => handlePlayPause(m.voiceId, m.sourceUrl, e)}>
                          {playingVoiceId === m.voiceId ? '⏸' : '▶'}
                        </button>
                      </div>
                    </div>
                  ))
                )}
              </div>
            )}
          </div>

          {/* 语速 */}
          <div className="voice-section">
            <div className="voice-section-header">
              <span className="voice-section-title">语速</span>
              <span className="speed-value">{speed.toFixed(1)}x</span>
            </div>
            <div className="speed-slider-wrap">
              <span>0.5x</span>
              <input
                type="range"
                className="slider-input"
                min={5}
                max={20}
                step={1}
                value={Math.round(speed * 10)}
                onChange={e => { const v = parseInt(e.target.value) / 10; setSpeed(v); saveMetaToLocalFile({ voiceSpeed: v }); }}
                style={{ '--slider-percent': `${((Math.round(speed * 10) - 5) / 15) * 100}%` } as React.CSSProperties}
              />
              <span>2.0x</span>
            </div>
          </div>

          {/* 音量 */}
          <div className="voice-section">
            <div className="voice-section-header">
              <span className="voice-section-title">音量</span>
              <span className="speed-value">{volume}</span>
            </div>
            <div className="speed-slider-wrap">
              <span>0</span>
              <input
                type="range"
                className="slider-input"
                min={0}
                max={10}
                step={1}
                value={volume}
                onChange={e => { const v = parseInt(e.target.value); setVolume(v); saveMetaToLocalFile({ voiceVolume: v }); }}
                style={{ '--slider-percent': `${(volume / 10) * 100}%` } as React.CSSProperties}
              />
              <span>10</span>
            </div>
          </div>

          {/* 音调 */}
          <div className="voice-section">
            <div className="voice-section-header">
              <span className="voice-section-title">音调</span>
              <span className="speed-value">{pitch}</span>
            </div>
            <div className="speed-slider-wrap">
              <span>-12</span>
              <input
                type="range"
                className="slider-input"
                min={-12}
                max={12}
                step={1}
                value={pitch}
                onChange={e => { const v = parseInt(e.target.value); setPitch(v); saveMetaToLocalFile({ voicePitch: v }); }}
                style={{ '--slider-percent': `${((pitch + 12) / 24) * 100}%` } as React.CSSProperties}
              />
              <span>12</span>
            </div>
          </div>

          {/* 底部生成按钮 */}
          <div className="voice-generate-wrap">
            {!hasGeneratedAudio ? (
              <button className="btn btn-primary generate-btn" onClick={handleGenerate} disabled={isGenerating || !mergedText.trim()}>
                {isGenerating ? '合成中...' : `合成配音（预计消耗 ${estimatedTtsPoints.min}~${estimatedTtsPoints.max} 积分）`}
              </button>
            ) : (
              <div className="voice-generate-btns">
                <button className="btn btn-secondary generate-btn" onClick={handleGenerate} disabled={isGenerating || !mergedText.trim()}>
                  {isGenerating ? '合成中...' : `重新生成（预计消耗 ${estimatedTtsPoints.min}~${estimatedTtsPoints.max} 积分）`}
                </button>
                <button
                  className="btn btn-primary generate-btn"
                  onClick={handleToggleGeneratedAudio}
                >
                  {isPlayingGenerated ? '⏸ 暂停播放' : '▶ 试听播放'}
                </button>
              </div>
            )}
          </div>
        </div>

        {/* 右侧：配音文本 */}
        <div className="script-content">
          <div className="script-content-header">
            配音文本
            <span className="script-content-meta">{totalChars} 字 · {segments.length} 个分镜</span>
          </div>
          <textarea readOnly value={mergedText} rows={20} className="script-textarea" />
        </div>
      </div>
      <PointsModal />
    </div>
  );
}