perf: optimize memory usage and processing performance

2025-05-09 20:55:12 +08:00
parent 35a7ef657a
commit f07e5802f7
1 changed files with 182 additions and 136 deletions
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -1,8 +1,9 @@
 import glob
 import os
 import random
 import gc
 import shutil
 from typing import List
 from loguru import logger
 from moviepy import (
    AudioFileClip,
@@ -29,6 +30,68 @@ from app.models.schema import (
 from app.services.utils import video_effects
 from app.utils import utils
 class SubClippedVideoClip:
    def __init__(self, file_path, start_time, end_time, width=None, height=None):
        self.file_path = file_path
        self.start_time = start_time
        self.end_time = end_time
        self.width = width
        self.height = height
    def __str__(self):
        return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, width={self.width}, height={self.height})"
 audio_codec = "aac"
 video_codec = "libx264"
 fps = 30
 def close_clip(clip):
    if clip is None:
        return
    try:
        # close main resources
        if hasattr(clip, 'reader') and clip.reader is not None:
            clip.reader.close()
        # close audio resources
        if hasattr(clip, 'audio') and clip.audio is not None:
            if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
                clip.audio.reader.close()
            del clip.audio
        # close mask resources
        if hasattr(clip, 'mask') and clip.mask is not None:
            if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
                clip.mask.reader.close()
            del clip.mask
        # handle child clips in composite clips
        if hasattr(clip, 'clips') and clip.clips:
            for child_clip in clip.clips:
                if child_clip is not clip:  # avoid possible circular references
                    close_clip(child_clip)
        # clear clip list
        if hasattr(clip, 'clips'):
            clip.clips = []
    except Exception as e:
        logger.error(f"failed to close clip: {str(e)}")
    del clip
    gc.collect()
 def delete_files(files: List[str] | str):
    if isinstance(files, str):
        files = [files]
    for file in files:
        try:
            os.remove(file)
        except:
            pass
 def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
    if not bgm_type:
@@ -58,85 +121,76 @@ def combine_videos(
 ) -> str:
    audio_clip = AudioFileClip(audio_file)
    audio_duration = audio_clip.duration
-    logger.info(f"max duration of audio: {audio_duration} seconds")
+    logger.info(f"audio duration: {audio_duration} seconds")
    # Required duration of each clip
    req_dur = audio_duration / len(video_paths)
    req_dur = max_clip_duration
-    logger.info(f"each clip will be maximum {req_dur} seconds long")
+    logger.info(f"maximum clip duration: {req_dur} seconds")
    output_dir = os.path.dirname(combined_video_path)
    aspect = VideoAspect(video_aspect)
    video_width, video_height = aspect.to_resolution()
-    clips = []
+    clip_files = []
    subclipped_items = []
    video_duration = 0
    raw_clips = []
    for video_path in video_paths:
-        clip = VideoFileClip(video_path).without_audio()
+        clip = VideoFileClip(video_path)
        clip_duration = clip.duration
        clip_w, clip_h = clip.size
        close_clip(clip)
        start_time = 0
        while start_time < clip_duration:
-            end_time = min(start_time + max_clip_duration, clip_duration)
+            end_time = min(start_time + max_clip_duration, clip_duration)            
-            split_clip = clip.subclipped(start_time, end_time)
+            if clip_duration - start_time > max_clip_duration:
-            raw_clips.append(split_clip)
+                subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
-            # logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}")
+            start_time = end_time    
            start_time = end_time
            if video_concat_mode.value == VideoConcatMode.sequential.value:
                break
-    # random video_paths order
+    # random subclipped_items order
    if video_concat_mode.value == VideoConcatMode.random.value:
-        random.shuffle(raw_clips)
+        random.shuffle(subclipped_items)
-
+        
    logger.debug(f"total subclipped items: {len(subclipped_items)}")
    # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
-    while video_duration < audio_duration:
+    for i, subclipped_item in enumerate(subclipped_items):
-        for clip in raw_clips:
+        if video_duration > audio_duration:
-            # Check if clip is longer than the remaining audio
+            break
-            if (audio_duration - video_duration) < clip.duration:
+        
-                clip = clip.subclipped(0, (audio_duration - video_duration))
+        logger.debug(f"processing clip {i+1}: {subclipped_item.width}x{subclipped_item.height}, current duration: {video_duration:.2f}s, remaining: {audio_duration - video_duration:.2f}s")
-            # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
+        
-            elif req_dur < clip.duration:
+        try:
-                clip = clip.subclipped(0, req_dur)
+            clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
-            clip = clip.with_fps(30)
+            clip_duration = clip.duration
            # Not all videos are same size, so we need to resize them
            clip_w, clip_h = clip.size
            if clip_w != video_width or clip_h != video_height:
                clip_ratio = clip.w / clip.h
                video_ratio = video_width / video_height
-
+                logger.debug(f"resizing to {video_width}x{video_height}, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target ratio: {video_ratio:.2f}")
                if clip_ratio == video_ratio:
-                    # Resize proportionally
+                    clip = clip.resized(new_size=(video_width, video_height))
                    clip = clip.resized((video_width, video_height))
                else:
                    # Resize proportionally
                    if clip_ratio > video_ratio:
                        # Resize proportionally based on the target width
                        scale_factor = video_width / clip_w
                    else:
                        # Resize proportionally based on the target height
                        scale_factor = video_height / clip_h
                    new_width = int(clip_w * scale_factor)
                    new_height = int(clip_h * scale_factor)
                    clip_resized = clip.resized(new_size=(new_width, new_height))
                    background = ColorClip(
                        size=(video_width, video_height), color=(0, 0, 0)
                    )
                    clip = CompositeVideoClip(
                        [
                            background.with_duration(clip.duration),
                            clip_resized.with_position("center"),
                        ]
                    )
                logger.info(
                    f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
                )
                    background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration)
                    clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center")
                    clip = CompositeVideoClip([background, clip_resized])
                    close_clip(clip_resized)
                    close_clip(background)
            shuffle_side = random.choice(["left", "right", "top", "bottom"])
            if video_transition_mode.value == VideoTransitionMode.none.value:
                clip = clip
@@ -160,24 +214,81 @@ def combine_videos(
            if clip.duration > max_clip_duration:
                clip = clip.subclipped(0, max_clip_duration)
-
+                
-            clips.append(clip)
+            # wirte clip to temp file
            clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
            clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
            close_clip(clip)
            clip_files.append(clip_file)
            video_duration += clip.duration
-    clips = [CompositeVideoClip([clip]) for clip in clips]
+            
-    video_clip = concatenate_videoclips(clips)
+        except Exception as e:
-    video_clip = video_clip.with_fps(30)
+            logger.error(f"failed to process clip: {str(e)}")
-    logger.info("writing")
+            
-    # https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
+    # merge video clips progressively, avoid loading all videos at once to avoid memory overflow
-    video_clip.write_videofile(
+    logger.info("starting clip merging process")
-        filename=combined_video_path,
+    if not clip_files:
-        threads=threads,
+        logger.warning("no clips available for merging")
-        logger=None,
+        return combined_video_path
-        temp_audiofile_path=output_dir,
+    
-        audio_codec="aac",
+    # if there is only one clip, use it directly
-        fps=30,
+    if len(clip_files) == 1:
-    )
+        logger.info("using single clip directly")
-    video_clip.close()
+        shutil.copy(clip_files[0], combined_video_path)
-    logger.success("completed")
+        delete_files(clip_files)
        logger.info("video combining completed")
        return combined_video_path
    # create initial video file as base
    base_clip_path = clip_files[0]
    temp_merged_video = f"{output_dir}/temp-merged-video.mp4"
    temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
    # copy first clip as initial merged video
    shutil.copy(base_clip_path, temp_merged_video)
    # merge remaining video clips one by one
    for i, clip_path in enumerate(clip_files[1:], 1):
        logger.info(f"merging clip {i}/{len(clip_files)-1}")
        try:
            # load current base video and next clip to merge
            base_clip = VideoFileClip(temp_merged_video)
            next_clip = VideoFileClip(clip_path)
            # merge these two clips
            merged_clip = concatenate_videoclips([base_clip, next_clip])
            # save merged result to temp file
            merged_clip.write_videofile(
                filename=temp_merged_next,
                threads=threads,
                logger=None,
                temp_audiofile_path=output_dir,
                audio_codec=audio_codec,
                fps=fps,
            )
            close_clip(base_clip)
            close_clip(next_clip)
            close_clip(merged_clip)
            # replace base file with new merged file
            delete_files(temp_merged_video)
            os.rename(temp_merged_next, temp_merged_video)
        except Exception as e:
            logger.error(f"failed to merge clip: {str(e)}")
            continue
    # after merging, rename final result to target file name
    os.rename(temp_merged_video, combined_video_path)
    # clean temp files
    delete_files(clip_files)
    logger.info("video combining completed")
    return combined_video_path
@@ -194,8 +305,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
    if width <= max_width:
        return text, height
    # logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
    processed = True
    _wrapped_lines_ = []
@@ -218,7 +327,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
        result = "\n".join(_wrapped_lines_).strip()
        height = len(_wrapped_lines_) * height
        # logger.warning(f"wrapped text: {result}")
        return result, height
    _wrapped_lines_ = []
@@ -235,7 +343,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
    _wrapped_lines_.append(_txt_)
    result = "\n".join(_wrapped_lines_).strip()
    height = len(_wrapped_lines_) * height
    # logger.warning(f"wrapped text: {result}")
    return result, height
@@ -249,7 +356,7 @@ def generate_video(
    aspect = VideoAspect(params.video_aspect)
    video_width, video_height = aspect.to_resolution()
-    logger.info(f"start, video size: {video_width} x {video_height}")
+    logger.info(f"generating video: {video_width} x {video_height}")
    logger.info(f"  ① video: {video_path}")
    logger.info(f"  ② audio: {audio_path}")
    logger.info(f"  ③ subtitle: {subtitle_path}")
@@ -268,7 +375,7 @@ def generate_video(
        if os.name == "nt":
            font_path = font_path.replace("\\", "/")
-        logger.info(f"using font: {font_path}")
+        logger.info(f"  ⑤ font: {font_path}")
    def create_text_clip(subtitle_item):
        params.font_size = int(params.font_size)
@@ -314,7 +421,7 @@ def generate_video(
            _clip = _clip.with_position(("center", "center"))
        return _clip
-    video_clip = VideoFileClip(video_path)
+    video_clip = VideoFileClip(video_path).without_audio()
    audio_clip = AudioFileClip(audio_path).with_effects(
        [afx.MultiplyVolume(params.voice_volume)]
    )
@@ -353,15 +460,14 @@ def generate_video(
    video_clip = video_clip.with_audio(audio_clip)
    video_clip.write_videofile(
        output_file,
-        audio_codec="aac",
+        audio_codec=audio_codec,
        temp_audiofile_path=output_dir,
        threads=params.n_threads or 2,
        logger=None,
-        fps=30,
+        fps=fps,
    )
    video_clip.close()
    del video_clip
    logger.success("completed")
 def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
@@ -378,7 +484,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
        width = clip.size[0]
        height = clip.size[1]
        if width < 480 or height < 480:
-            logger.warning(f"video is too small, width: {width}, height: {height}")
+            logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
            continue
        if ext in const.FILE_TYPE_IMAGES:
@@ -408,65 +514,5 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
            final_clip.close()
            del final_clip
            material.url = video_file
-            logger.success(f"completed: {video_file}")
+            logger.success(f"image processed: {video_file}")
-    return materials
+    return materials
 if __name__ == "__main__":
    m = MaterialInfo()
    m.url = "/Users/harry/Downloads/IMG_2915.JPG"
    m.provider = "local"
    materials = preprocess_video([m], clip_duration=4)
    print(materials)
    # txt_en = "Here's your guide to travel hacks for budget-friendly adventures"
    # txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险"
    # font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
    # for txt in [txt_en, txt_zh]:
    #     t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
    #     print(t)
    #
    # task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf"
    # task_dir = utils.task_dir(task_id)
    # video_file = f"{task_dir}/combined-1.mp4"
    # audio_file = f"{task_dir}/audio.mp3"
    # subtitle_file = f"{task_dir}/subtitle.srt"
    # output_file = f"{task_dir}/final.mp4"
    #
    # # video_paths = []
    # # for file in os.listdir(utils.storage_dir("test")):
    # #     if file.endswith(".mp4"):
    # #         video_paths.append(os.path.join(utils.storage_dir("test"), file))
    # #
    # # combine_videos(combined_video_path=video_file,
    # #                audio_file=audio_file,
    # #                video_paths=video_paths,
    # #                video_aspect=VideoAspect.portrait,
    # #                video_concat_mode=VideoConcatMode.random,
    # #                max_clip_duration=5,
    # #                threads=2)
    #
    # cfg = VideoParams()
    # cfg.video_aspect = VideoAspect.portrait
    # cfg.font_name = "STHeitiMedium.ttc"
    # cfg.font_size = 60
    # cfg.stroke_color = "#000000"
    # cfg.stroke_width = 1.5
    # cfg.text_fore_color = "#FFFFFF"
    # cfg.text_background_color = "transparent"
    # cfg.bgm_type = "random"
    # cfg.bgm_file = ""
    # cfg.bgm_volume = 1.0
    # cfg.subtitle_enabled = True
    # cfg.subtitle_position = "bottom"
    # cfg.n_threads = 2
    # cfg.paragraph_number = 1
    #
    # cfg.voice_volume = 1.0
    #
    # generate_video(video_path=video_file,
    #                audio_path=audio_file,
    #                subtitle_path=subtitle_file,
    #                output_file=output_file,
    #                params=cfg
    #                )