PresentaPulse/audio_processor.py at main · LebToki/PresentaPulse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
"""
Audio processing utilities for video enhancement
Extract, sync, and enhance audio for animated videos
"""
import subprocess
import logging
import os
from pathlib import Path
from typing import Optional, Tuple
import tempfile

try:
    import cv2
    CV2_AVAILABLE = True
except ImportError:
    CV2_AVAILABLE = False
    logging.warning("OpenCV not available for audio processing")

def find_ffmpeg():
    """Find ffmpeg executable."""
    import shutil
    ffmpeg_path = shutil.which('ffmpeg')
    if ffmpeg_path:
        return ffmpeg_path

    import platform
    if platform.system() == 'Windows':
        common_paths = [
            'C:\\ffmpeg\\bin\\ffmpeg.exe',
            'C:\\Program Files\\ffmpeg\\bin\\ffmpeg.exe',
        ]
    else:
        common_paths = [
            '/usr/bin/ffmpeg',
            '/usr/local/bin/ffmpeg',
        ]

    for path in common_paths:
        if os.path.exists(path):
            return path

    return 'ffmpeg'

FFMPEG_PATH = find_ffmpeg()

def find_ffprobe():
    """Find ffprobe executable."""
    import shutil
    ffprobe_path = shutil.which('ffprobe')
    if ffprobe_path:
        return ffprobe_path

    import platform
    if platform.system() == 'Windows':
        common_paths = [
            'C:\\ffmpeg\\bin\\ffprobe.exe',
            'C:\\Program Files\\ffmpeg\\bin\\ffprobe.exe',
        ]
    else:
        common_paths = [
            '/usr/bin/ffprobe',
            '/usr/local/bin/ffprobe',
        ]

    for path in common_paths:
        if os.path.exists(path):
            return path

    return 'ffprobe'

FFPROBE_PATH = find_ffprobe()


def extract_audio(video_path: str, output_audio_path: Optional[str] = None) -> Optional[str]:
    """
    Extract audio from video file.

    Args:
        video_path: Path to video file
        output_audio_path: Optional output path (defaults to temp file)

    Returns:
        Path to extracted audio file, or None if extraction failed
    """
    try:
        if not os.path.exists(video_path):
            logging.error(f"Video file not found: {video_path}")
            return None

        # Create output path if not provided
        if output_audio_path is None:
            temp_dir = tempfile.gettempdir()
            output_audio_path = os.path.join(temp_dir, f"extracted_audio_{os.path.basename(video_path)}.wav")

        # Extract audio using ffmpeg
        command = [
            FFMPEG_PATH,
            '-i', str(video_path),
            '-vn',  # No video
            '-acodec', 'pcm_s16le',  # PCM 16-bit
            '-ar', '44100',  # Sample rate
            '-ac', '2',  # Stereo
            '-y',  # Overwrite
            str(output_audio_path)
        ]

        result = subprocess.run(command, capture_output=True, text=True, check=True)

        if os.path.exists(output_audio_path):
            logging.info(f"Audio extracted to: {output_audio_path}")
            return output_audio_path
        else:
            logging.error("Audio extraction failed - output file not created")
            return None

    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        return None
    except Exception as e:
        logging.error(f"Error in extract_audio: {str(e)}")
        return None


def get_audio_duration(audio_path: str) -> float:
    """Get duration of audio file in seconds."""
    try:
        command = [
            FFPROBE_PATH,
            '-v', 'error',
            '-show_entries', 'format=duration',
            '-of', 'default=noprint_wrappers=1:nokey=1',
            str(audio_path)
        ]
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        return float(result.stdout.strip())
    except Exception as e:
        logging.warning(f"Could not get audio duration: {e}")
        return 0.0


def get_video_duration(video_path: str) -> float:
    """Get duration of video file in seconds."""
    try:
        command = [
            FFPROBE_PATH,
            '-v', 'error',
            '-show_entries', 'format=duration',
            '-of', 'default=noprint_wrappers=1:nokey=1',
            str(video_path)
        ]
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        return float(result.stdout.strip())
    except Exception as e:
        logging.warning(f"Could not get video duration: {e}")
        return 0.0


def sync_audio_to_video(audio_path: str, video_path: str, output_path: str,
                       loop_audio: bool = False, normalize: bool = False) -> bool:
    """
    Sync audio to video, matching durations.

    Args:
        audio_path: Path to audio file
        video_path: Path to video file
        output_path: Output video path with audio
        loop_audio: Loop audio if shorter than video
        normalize: Normalize audio levels

    Returns:
        True if successful, False otherwise
    """
    try:
        audio_duration = get_audio_duration(audio_path)
        video_duration = get_video_duration(video_path)

        # Build ffmpeg command
        command = [
            FFMPEG_PATH,
            '-i', str(video_path),
            '-i', str(audio_path),
            '-c:v', 'copy',  # Copy video stream (no re-encode)
        ]

        # Audio filter for normalization
        if normalize:
            command.extend(['-af', 'loudnorm=I=-16:TP=-1.5:LRA=11'])

        # Handle audio duration mismatch
        if audio_duration < video_duration and loop_audio:
            # Loop audio to match video length
            command.extend(['-filter_complex', f'[1:0]aloop=loop=-1:size=2e+09[a]'])
            command.extend(['-map', '0:v:0', '-map', '[a]'])
        elif audio_duration > video_duration:
            # Trim audio to match video
            command.extend(['-shortest'])
        else:
            # Use audio as-is
            command.extend(['-map', '0:v:0', '-map', '1:a:0'])

        command.extend([
            '-c:a', 'aac',
            '-b:a', '192k',
            '-strict', 'experimental',
            '-y',
            str(output_path)
        ])

        subprocess.run(command, capture_output=True, check=True)

        if os.path.exists(output_path):
            logging.info(f"Audio synced to video: {output_path}")
            return True
        else:
            logging.error("Audio sync failed - output file not created")
            return False

    except subprocess.CalledProcessError as e:
        logging.error(f"Error syncing audio: {e.stderr}")
        return False
    except Exception as e:
        logging.error(f"Error in sync_audio_to_video: {str(e)}")
        return False


def add_background_music(video_path: str, music_path: str, output_path: str,
                        music_volume: float = 0.3, original_audio_volume: float = 1.0,
                        normalize: bool = True) -> bool:
    """
    Add background music to video with volume mixing.

    Args:
        video_path: Path to video file
        music_path: Path to background music file
        output_path: Output video path
        music_volume: Volume of background music (0.0-1.0)
        original_audio_volume: Volume of original audio (0.0-1.0)
        normalize: Normalize audio levels

    Returns:
        True if successful, False otherwise
    """
    try:
        if not os.path.exists(music_path):
            logging.error(f"Music file not found: {music_path}")
            return False

        video_duration = get_video_duration(video_path)
        music_duration = get_audio_duration(music_path)

        # Build audio filter
        audio_filters = []

        # Check if video has audio
        has_audio = check_video_has_audio(video_path)

        if has_audio:
            # Mix original audio with background music
            if music_duration < video_duration:
                # Loop music
                audio_filters.append(f'[1:a]aloop=loop=-1:size=2e+09,volume={music_volume}[music]')
                audio_filters.append(f'[0:a]volume={original_audio_volume}[orig]')
                audio_filters.append(f'[orig][music]amix=inputs=2:duration=first:dropout_transition=2[mixed]')
            else:
                # Trim music to video length
                audio_filters.append(f'[1:a]atrim=0:{video_duration},volume={music_volume}[music]')
                audio_filters.append(f'[0:a]volume={original_audio_volume}[orig]')
                audio_filters.append(f'[orig][music]amix=inputs=2:duration=first:dropout_transition=2[mixed]')
        else:
            # Just add music (no original audio)
            if music_duration < video_duration:
                audio_filters.append(f'[1:a]aloop=loop=-1:size=2e+09,volume={music_volume}[mixed]')
            else:
                audio_filters.append(f'[1:a]atrim=0:{video_duration},volume={music_volume}[mixed]')

        # Add normalization if requested
        if normalize:
            audio_filters.append('[mixed]loudnorm=I=-16:TP=-1.5:LRA=11[final]')
            output_label = '[final]'
        else:
            output_label = '[mixed]'

        filter_complex = ';'.join(audio_filters)

        command = [
            FFMPEG_PATH,
            '-i', str(video_path),
            '-i', str(music_path),
            '-filter_complex', filter_complex,
            '-map', '0:v:0',
            '-map', output_label,
            '-c:v', 'copy',
            '-c:a', 'aac',
            '-b:a', '192k',
            '-shortest',
            '-y',
            str(output_path)
        ]

        subprocess.run(command, capture_output=True, check=True)

        if os.path.exists(output_path):
            logging.info(f"Background music added: {output_path}")
            return True
        else:
            logging.error("Adding background music failed - output file not created")
            return False

    except subprocess.CalledProcessError as e:
        logging.error(f"Error adding background music: {e.stderr}")
        return False
    except Exception as e:
        logging.error(f"Error in add_background_music: {str(e)}")
        return False


def normalize_audio(audio_path: str, output_path: str) -> bool:
    """
    Normalize audio levels using loudnorm filter.

    Args:
        audio_path: Path to input audio file
        output_path: Path to output normalized audio file

    Returns:
        True if successful, False otherwise
    """
    try:
        command = [
            FFMPEG_PATH,
            '-i', str(audio_path),
            '-af', 'loudnorm=I=-16:TP=-1.5:LRA=11',
            '-y',
            str(output_path)
        ]

        subprocess.run(command, capture_output=True, check=True)

        if os.path.exists(output_path):
            logging.info(f"Audio normalized: {output_path}")
            return True
        else:
            logging.error("Audio normalization failed")
            return False

    except subprocess.CalledProcessError as e:
        logging.error(f"Error normalizing audio: {e.stderr}")
        return False
    except Exception as e:
        logging.error(f"Error in normalize_audio: {str(e)}")
        return False


def check_video_has_audio(video_path: str) -> bool:
    """Check if video file has audio stream."""
    try:
        command = [
            FFPROBE_PATH,
            '-v', 'error',
            '-select_streams', 'a:0',
            '-show_entries', 'stream=codec_type',
            '-of', 'default=noprint_wrappers=1:nokey=1',
            str(video_path)
        ]
        result = subprocess.run(command, capture_output=True, text=True, check=False)
        return 'audio' in result.stdout.lower()
    except Exception:
        return False


def process_audio_for_video(video_path: str, source_video_path: Optional[str] = None,
                           background_music_path: Optional[str] = None,
                           music_volume: float = 0.3, normalize: bool = True,
                           loop_audio: bool = False, output_path: Optional[str] = None) -> Optional[str]:
    """
    Complete audio processing pipeline for video.

    Args:
        video_path: Path to video file (may or may not have audio)
        source_video_path: Path to source video to extract audio from
        background_music_path: Path to background music file
        music_volume: Volume of background music (0.0-1.0)
        normalize: Normalize audio levels
        loop_audio: Loop audio if shorter than video
        output_path: Output video path (defaults to input with _audio suffix)

    Returns:
        Path to output video with audio, or None if failed
    """
    try:
        if output_path is None:
            video_path_obj = Path(video_path)
            output_path = str(video_path_obj.parent / f"{video_path_obj.stem}_with_audio{video_path_obj.suffix}")

        # Step 1: Extract audio from source video if provided
        extracted_audio = None
        if source_video_path and os.path.exists(source_video_path):
            extracted_audio = extract_audio(source_video_path)

        # Step 2: Add background music if provided
        if background_music_path and os.path.exists(background_music_path):
            if extracted_audio:
                # First sync extracted audio
                temp_video = str(Path(output_path).parent / 'temp_with_audio.mp4')
                sync_audio_to_video(extracted_audio, video_path, temp_video, loop_audio, normalize)
                # Then add background music
                success = add_background_music(temp_video, background_music_path, output_path,
                                             music_volume, 1.0, normalize)
                # Cleanup temp file
                if os.path.exists(temp_video):
                    os.remove(temp_video)
                return output_path if success else None
            else:
                # Just add background music
                success = add_background_music(video_path, background_music_path, output_path,
                                             music_volume, 0.0, normalize)
                return output_path if success else None

        # Step 3: Sync extracted audio if no background music
        elif extracted_audio:
            success = sync_audio_to_video(extracted_audio, video_path, output_path, loop_audio, normalize)
            return output_path if success else None

        # No audio processing needed
        return video_path

    except Exception as e:
        logging.error(f"Error in process_audio_for_video: {str(e)}")
        return None