perf: optimize memory usage and processing performance

This commit is contained in:
harry
2025-05-09 20:55:12 +08:00
parent 35a7ef657a
commit f07e5802f7

View File

@@ -1,8 +1,9 @@
import glob import glob
import os import os
import random import random
import gc
import shutil
from typing import List from typing import List
from loguru import logger from loguru import logger
from moviepy import ( from moviepy import (
AudioFileClip, AudioFileClip,
@@ -29,6 +30,68 @@ from app.models.schema import (
from app.services.utils import video_effects from app.services.utils import video_effects
from app.utils import utils from app.utils import utils
class SubClippedVideoClip:
def __init__(self, file_path, start_time, end_time, width=None, height=None):
self.file_path = file_path
self.start_time = start_time
self.end_time = end_time
self.width = width
self.height = height
def __str__(self):
return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, width={self.width}, height={self.height})"
audio_codec = "aac"
video_codec = "libx264"
fps = 30
def close_clip(clip):
if clip is None:
return
try:
# close main resources
if hasattr(clip, 'reader') and clip.reader is not None:
clip.reader.close()
# close audio resources
if hasattr(clip, 'audio') and clip.audio is not None:
if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
clip.audio.reader.close()
del clip.audio
# close mask resources
if hasattr(clip, 'mask') and clip.mask is not None:
if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
clip.mask.reader.close()
del clip.mask
# handle child clips in composite clips
if hasattr(clip, 'clips') and clip.clips:
for child_clip in clip.clips:
if child_clip is not clip: # avoid possible circular references
close_clip(child_clip)
# clear clip list
if hasattr(clip, 'clips'):
clip.clips = []
except Exception as e:
logger.error(f"failed to close clip: {str(e)}")
del clip
gc.collect()
def delete_files(files: List[str] | str):
if isinstance(files, str):
files = [files]
for file in files:
try:
os.remove(file)
except:
pass
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""): def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
if not bgm_type: if not bgm_type:
@@ -58,85 +121,76 @@ def combine_videos(
) -> str: ) -> str:
audio_clip = AudioFileClip(audio_file) audio_clip = AudioFileClip(audio_file)
audio_duration = audio_clip.duration audio_duration = audio_clip.duration
logger.info(f"max duration of audio: {audio_duration} seconds") logger.info(f"audio duration: {audio_duration} seconds")
# Required duration of each clip # Required duration of each clip
req_dur = audio_duration / len(video_paths) req_dur = audio_duration / len(video_paths)
req_dur = max_clip_duration req_dur = max_clip_duration
logger.info(f"each clip will be maximum {req_dur} seconds long") logger.info(f"maximum clip duration: {req_dur} seconds")
output_dir = os.path.dirname(combined_video_path) output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect) aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution() video_width, video_height = aspect.to_resolution()
clips = [] clip_files = []
subclipped_items = []
video_duration = 0 video_duration = 0
raw_clips = []
for video_path in video_paths: for video_path in video_paths:
clip = VideoFileClip(video_path).without_audio() clip = VideoFileClip(video_path)
clip_duration = clip.duration clip_duration = clip.duration
clip_w, clip_h = clip.size
close_clip(clip)
start_time = 0 start_time = 0
while start_time < clip_duration: while start_time < clip_duration:
end_time = min(start_time + max_clip_duration, clip_duration) end_time = min(start_time + max_clip_duration, clip_duration)
split_clip = clip.subclipped(start_time, end_time) if clip_duration - start_time > max_clip_duration:
raw_clips.append(split_clip) subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
# logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}") start_time = end_time
start_time = end_time
if video_concat_mode.value == VideoConcatMode.sequential.value: if video_concat_mode.value == VideoConcatMode.sequential.value:
break break
# random video_paths order # random subclipped_items order
if video_concat_mode.value == VideoConcatMode.random.value: if video_concat_mode.value == VideoConcatMode.random.value:
random.shuffle(raw_clips) random.shuffle(subclipped_items)
logger.debug(f"total subclipped items: {len(subclipped_items)}")
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
while video_duration < audio_duration: for i, subclipped_item in enumerate(subclipped_items):
for clip in raw_clips: if video_duration > audio_duration:
# Check if clip is longer than the remaining audio break
if (audio_duration - video_duration) < clip.duration:
clip = clip.subclipped(0, (audio_duration - video_duration)) logger.debug(f"processing clip {i+1}: {subclipped_item.width}x{subclipped_item.height}, current duration: {video_duration:.2f}s, remaining: {audio_duration - video_duration:.2f}s")
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
elif req_dur < clip.duration: try:
clip = clip.subclipped(0, req_dur) clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
clip = clip.with_fps(30) clip_duration = clip.duration
# Not all videos are same size, so we need to resize them # Not all videos are same size, so we need to resize them
clip_w, clip_h = clip.size clip_w, clip_h = clip.size
if clip_w != video_width or clip_h != video_height: if clip_w != video_width or clip_h != video_height:
clip_ratio = clip.w / clip.h clip_ratio = clip.w / clip.h
video_ratio = video_width / video_height video_ratio = video_width / video_height
logger.debug(f"resizing to {video_width}x{video_height}, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target ratio: {video_ratio:.2f}")
if clip_ratio == video_ratio: if clip_ratio == video_ratio:
# Resize proportionally clip = clip.resized(new_size=(video_width, video_height))
clip = clip.resized((video_width, video_height))
else: else:
# Resize proportionally
if clip_ratio > video_ratio: if clip_ratio > video_ratio:
# Resize proportionally based on the target width
scale_factor = video_width / clip_w scale_factor = video_width / clip_w
else: else:
# Resize proportionally based on the target height
scale_factor = video_height / clip_h scale_factor = video_height / clip_h
new_width = int(clip_w * scale_factor) new_width = int(clip_w * scale_factor)
new_height = int(clip_h * scale_factor) new_height = int(clip_h * scale_factor)
clip_resized = clip.resized(new_size=(new_width, new_height))
background = ColorClip(
size=(video_width, video_height), color=(0, 0, 0)
)
clip = CompositeVideoClip(
[
background.with_duration(clip.duration),
clip_resized.with_position("center"),
]
)
logger.info(
f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
)
background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration)
clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center")
clip = CompositeVideoClip([background, clip_resized])
close_clip(clip_resized)
close_clip(background)
shuffle_side = random.choice(["left", "right", "top", "bottom"]) shuffle_side = random.choice(["left", "right", "top", "bottom"])
if video_transition_mode.value == VideoTransitionMode.none.value: if video_transition_mode.value == VideoTransitionMode.none.value:
clip = clip clip = clip
@@ -160,24 +214,81 @@ def combine_videos(
if clip.duration > max_clip_duration: if clip.duration > max_clip_duration:
clip = clip.subclipped(0, max_clip_duration) clip = clip.subclipped(0, max_clip_duration)
clips.append(clip) # wirte clip to temp file
clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
close_clip(clip)
clip_files.append(clip_file)
video_duration += clip.duration video_duration += clip.duration
clips = [CompositeVideoClip([clip]) for clip in clips]
video_clip = concatenate_videoclips(clips) except Exception as e:
video_clip = video_clip.with_fps(30) logger.error(f"failed to process clip: {str(e)}")
logger.info("writing")
# https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030 # merge video clips progressively, avoid loading all videos at once to avoid memory overflow
video_clip.write_videofile( logger.info("starting clip merging process")
filename=combined_video_path, if not clip_files:
threads=threads, logger.warning("no clips available for merging")
logger=None, return combined_video_path
temp_audiofile_path=output_dir,
audio_codec="aac", # if there is only one clip, use it directly
fps=30, if len(clip_files) == 1:
) logger.info("using single clip directly")
video_clip.close() shutil.copy(clip_files[0], combined_video_path)
logger.success("completed") delete_files(clip_files)
logger.info("video combining completed")
return combined_video_path
# create initial video file as base
base_clip_path = clip_files[0]
temp_merged_video = f"{output_dir}/temp-merged-video.mp4"
temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
# copy first clip as initial merged video
shutil.copy(base_clip_path, temp_merged_video)
# merge remaining video clips one by one
for i, clip_path in enumerate(clip_files[1:], 1):
logger.info(f"merging clip {i}/{len(clip_files)-1}")
try:
# load current base video and next clip to merge
base_clip = VideoFileClip(temp_merged_video)
next_clip = VideoFileClip(clip_path)
# merge these two clips
merged_clip = concatenate_videoclips([base_clip, next_clip])
# save merged result to temp file
merged_clip.write_videofile(
filename=temp_merged_next,
threads=threads,
logger=None,
temp_audiofile_path=output_dir,
audio_codec=audio_codec,
fps=fps,
)
close_clip(base_clip)
close_clip(next_clip)
close_clip(merged_clip)
# replace base file with new merged file
delete_files(temp_merged_video)
os.rename(temp_merged_next, temp_merged_video)
except Exception as e:
logger.error(f"failed to merge clip: {str(e)}")
continue
# after merging, rename final result to target file name
os.rename(temp_merged_video, combined_video_path)
# clean temp files
delete_files(clip_files)
logger.info("video combining completed")
return combined_video_path return combined_video_path
@@ -194,8 +305,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
if width <= max_width: if width <= max_width:
return text, height return text, height
# logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
processed = True processed = True
_wrapped_lines_ = [] _wrapped_lines_ = []
@@ -218,7 +327,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
_wrapped_lines_ = [line.strip() for line in _wrapped_lines_] _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
result = "\n".join(_wrapped_lines_).strip() result = "\n".join(_wrapped_lines_).strip()
height = len(_wrapped_lines_) * height height = len(_wrapped_lines_) * height
# logger.warning(f"wrapped text: {result}")
return result, height return result, height
_wrapped_lines_ = [] _wrapped_lines_ = []
@@ -235,7 +343,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
_wrapped_lines_.append(_txt_) _wrapped_lines_.append(_txt_)
result = "\n".join(_wrapped_lines_).strip() result = "\n".join(_wrapped_lines_).strip()
height = len(_wrapped_lines_) * height height = len(_wrapped_lines_) * height
# logger.warning(f"wrapped text: {result}")
return result, height return result, height
@@ -249,7 +356,7 @@ def generate_video(
aspect = VideoAspect(params.video_aspect) aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution() video_width, video_height = aspect.to_resolution()
logger.info(f"start, video size: {video_width} x {video_height}") logger.info(f"generating video: {video_width} x {video_height}")
logger.info(f" ① video: {video_path}") logger.info(f" ① video: {video_path}")
logger.info(f" ② audio: {audio_path}") logger.info(f" ② audio: {audio_path}")
logger.info(f" ③ subtitle: {subtitle_path}") logger.info(f" ③ subtitle: {subtitle_path}")
@@ -268,7 +375,7 @@ def generate_video(
if os.name == "nt": if os.name == "nt":
font_path = font_path.replace("\\", "/") font_path = font_path.replace("\\", "/")
logger.info(f"using font: {font_path}") logger.info(f" font: {font_path}")
def create_text_clip(subtitle_item): def create_text_clip(subtitle_item):
params.font_size = int(params.font_size) params.font_size = int(params.font_size)
@@ -314,7 +421,7 @@ def generate_video(
_clip = _clip.with_position(("center", "center")) _clip = _clip.with_position(("center", "center"))
return _clip return _clip
video_clip = VideoFileClip(video_path) video_clip = VideoFileClip(video_path).without_audio()
audio_clip = AudioFileClip(audio_path).with_effects( audio_clip = AudioFileClip(audio_path).with_effects(
[afx.MultiplyVolume(params.voice_volume)] [afx.MultiplyVolume(params.voice_volume)]
) )
@@ -353,15 +460,14 @@ def generate_video(
video_clip = video_clip.with_audio(audio_clip) video_clip = video_clip.with_audio(audio_clip)
video_clip.write_videofile( video_clip.write_videofile(
output_file, output_file,
audio_codec="aac", audio_codec=audio_codec,
temp_audiofile_path=output_dir, temp_audiofile_path=output_dir,
threads=params.n_threads or 2, threads=params.n_threads or 2,
logger=None, logger=None,
fps=30, fps=fps,
) )
video_clip.close() video_clip.close()
del video_clip del video_clip
logger.success("completed")
def preprocess_video(materials: List[MaterialInfo], clip_duration=4): def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
@@ -378,7 +484,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
width = clip.size[0] width = clip.size[0]
height = clip.size[1] height = clip.size[1]
if width < 480 or height < 480: if width < 480 or height < 480:
logger.warning(f"video is too small, width: {width}, height: {height}") logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
continue continue
if ext in const.FILE_TYPE_IMAGES: if ext in const.FILE_TYPE_IMAGES:
@@ -408,65 +514,5 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
final_clip.close() final_clip.close()
del final_clip del final_clip
material.url = video_file material.url = video_file
logger.success(f"completed: {video_file}") logger.success(f"image processed: {video_file}")
return materials return materials
if __name__ == "__main__":
m = MaterialInfo()
m.url = "/Users/harry/Downloads/IMG_2915.JPG"
m.provider = "local"
materials = preprocess_video([m], clip_duration=4)
print(materials)
# txt_en = "Here's your guide to travel hacks for budget-friendly adventures"
# txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险"
# font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
# for txt in [txt_en, txt_zh]:
# t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
# print(t)
#
# task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf"
# task_dir = utils.task_dir(task_id)
# video_file = f"{task_dir}/combined-1.mp4"
# audio_file = f"{task_dir}/audio.mp3"
# subtitle_file = f"{task_dir}/subtitle.srt"
# output_file = f"{task_dir}/final.mp4"
#
# # video_paths = []
# # for file in os.listdir(utils.storage_dir("test")):
# # if file.endswith(".mp4"):
# # video_paths.append(os.path.join(utils.storage_dir("test"), file))
# #
# # combine_videos(combined_video_path=video_file,
# # audio_file=audio_file,
# # video_paths=video_paths,
# # video_aspect=VideoAspect.portrait,
# # video_concat_mode=VideoConcatMode.random,
# # max_clip_duration=5,
# # threads=2)
#
# cfg = VideoParams()
# cfg.video_aspect = VideoAspect.portrait
# cfg.font_name = "STHeitiMedium.ttc"
# cfg.font_size = 60
# cfg.stroke_color = "#000000"
# cfg.stroke_width = 1.5
# cfg.text_fore_color = "#FFFFFF"
# cfg.text_background_color = "transparent"
# cfg.bgm_type = "random"
# cfg.bgm_file = ""
# cfg.bgm_volume = 1.0
# cfg.subtitle_enabled = True
# cfg.subtitle_position = "bottom"
# cfg.n_threads = 2
# cfg.paragraph_number = 1
#
# cfg.voice_volume = 1.0
#
# generate_video(video_path=video_file,
# audio_path=audio_file,
# subtitle_path=subtitle_file,
# output_file=output_file,
# params=cfg
# )