Revision of Slideshow synced to beat

1

+

#!/usr/bin/env python3

2

+

import argparse

3

+

import random

4

+

import os

5

+

6

+

import cv2

7

+

import librosa

8

+

import numpy as np

9

+

from moviepy.editor import *

10

+

from scipy.signal import butter, lfilter

11

+

from scipy.signal import find_peaks

12

+

13

+

# pip install opencv-python librosa numpy moviepy scipy

14

+

15

+

16

+

def detect_beats(audio_file_path, highcut=200, order=5, peak_distance=10, peak_height=0.01):

17

+

# Load the audio file

18

+

y, sr = librosa.load(audio_file_path)

19

+

20

+

# Apply a high-pass filter to isolate the bass frequencies

21

+

b, a = butter(order, highcut / (0.5 * sr), btype='high')

22

+

y_filtered = lfilter(b, a, y)

23

+

24

+

# Calculate the RMS energy of the filtered signal

25

+

rms = librosa.feature.rms(y=y_filtered, frame_length=1024, hop_length=512)[0]

26

+

27

+

# Normalize the RMS energy

28

+

rms_normalized = rms / np.max(rms)

29

+

30

+

# Detect the peaks in the RMS energy signal

31

+

peaks, _ = find_peaks(rms_normalized, distance=peak_distance, height=peak_height)

32

+

33

+

# Convert the peak indices to times

34

+

beat_times = librosa.frames_to_time(peaks, sr=sr, hop_length=512)

35

+

36

+

return beat_times

37

+

38

+

39

+

def create_slideshow(image_folder, audio_file, beat_times, max_duration=2, images=None):

40

+

if images is None:

41

+

images = [img for img in os.listdir(image_folder) if img.endswith(".jpg") or img.endswith(".png")]

42

+

43

+

clips = []

44

+

target_size = (1280, 720)

45

+

46

+

for i, beat_time in enumerate(beat_times[:-1]):

47

+

img_path = os.path.join(image_folder, images[i % len(images)])

48

+

img = cv2.imread(img_path)

49

+

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

50

+

51

+

# Resize the image while maintaining aspect ratio and fitting within the target size

52

+

height, width, _ = img.shape

53

+

target_width, target_height = target_size

54

+

55

+

scale_width = float(target_width) / float(width)

56

+

scale_height = float(target_height) / float(height)

57

+

scale_factor = min(scale_width, scale_height)

58

+

59

+

new_width = int(width * scale_factor)

60

+

new_height = int(height * scale_factor)

61

+

62

+

img_resized = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)

63

+

64

+

# Add padding

65

+

pad_top = max((target_height - new_height) // 2, 0)

66

+

pad_bottom = max(target_height - new_height - pad_top, 0)

67

+

pad_left = max((target_width - new_width) // 2, 0)

68

+

pad_right = max(target_width - new_width - pad_left, 0)

69

+

70

+

img_padded = cv2.copyMakeBorder(img_resized, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

71

+

72

+

duration = beat_times[i + 1] - beat_times[i]

73

+

74

+

# If the duration between two beats is greater than the max_duration, repeat the image

75

+

while duration > max_duration:

76

+

clip = ImageClip(img_padded, duration=max_duration)

77

+

clips.append(clip)

78

+

duration -= max_duration

79

+

80

+

clip = ImageClip(img_padded, duration=duration)

81

+

clips.append(clip)

82

+

83

+

slideshow = concatenate_videoclips(clips)

84

+

return slideshow

85

+

86

+

87

+

def main():

88

+

parser = argparse.ArgumentParser(description="Create a slideshow that matches the bass beats or lyrics of a song.")

89

+

parser.add_argument("image_folder", help="Path to the folder containing the images for the slideshow.")

90

+

parser.add_argument("audio_file_path", help="Path to the input audio file.")

91

+

parser.add_argument("output_file_path", help="Path to the output video file.")

92

+

parser.add_argument("--highcut", type=int, default=200, help="Cutoff frequency for the high-pass filter (default: 200 Hz).")

93

+

parser.add_argument("--order", type=int, default=5, help="Order of the Butterworth filter (default: 5).")

94

+

parser.add_argument("--peak-distance", type=int, default=10, help="Minimum number of samples between peaks (default: 10).")

95

+

parser.add_argument("--peak-height", type=float, default=0.01, help="Minimum height of a peak in the RMS energy signal (default: 0.01).")

96

+

parser.add_argument("--more-help", action="store_true", help="Show more help.")

97

+

parser.add_argument("--randomize", "-r", action="store_true", help="Randomize the order of the images in the slideshow.")

98

+

parser.add_argument("--image_order_file", "-f", help="Path to a text file containing the ordered list of image filenames (not file paths!).")

99

+

args = parser.parse_args()

100

+

101

+

if args.more_help:

102

+

print("""highcut: The cutoff frequency for the high-pass filter applied to isolate the bass frequencies. The default value is 200 Hz, which means that the filter will keep frequencies below 200 Hz (bass frequencies) and attenuate higher frequencies. You can adjust this value to focus on different frequency ranges of the bass.

103

+

104

+

order: The order of the Butterworth filter used for the high-pass filtering. A higher order results in a steeper roll-off, which means a more aggressive filtering. The default value is 5, which should work well for most cases. You can increase or decrease this value to change the sharpness of the filter.

105

+

106

+

peak_distance: The minimum number of samples between peaks in the RMS energy signal. This parameter helps to avoid detecting multiple peaks that are too close to each other. The default value is 10, which means that two peaks must be at least 10 samples apart to be considered separate peaks. You can adjust this value to control the minimum distance between detected beats.

107

+

108

+

peak_height: The minimum height of a peak in the normalized RMS energy signal. This parameter helps to filter out peaks that are too small and might not correspond to actual bass beats. The default value is 0.01, which means that a peak must have a height of at least 1% of the maximum RMS energy value to be considered a beat. You can adjust this value to control the minimum strength of detected beats.

109

+

110

+

When fine-tuning these parameters, you might want to start by adjusting highcut and peak_height to focus on the desired bass frequency range and beat strength. Then, you can experiment with the order and peak_distance parameters to further refine the beat detection. Keep in mind that the optimal values for these parameters might vary depending on the specific characteristics of the audio file you are working with.""")

111

+

quit()

112

+

113

+

print('Processing beats...')

114

+

beat_times = detect_beats(args.audio_file_path, highcut=args.highcut, order=args.order, peak_distance=args.peak_distance, peak_height=args.peak_height)

115

+

audio_file = AudioFileClip(args.audio_file_path)

116

+

117

+

images = [img for img in os.listdir(args.image_folder) if img.endswith(".jpg") or img.endswith(".png")]

118

+

if args.image_order_file:

119

+

with open(args.image_order_file, 'r') as f:

120

+

ordered_images = [line.strip() for line in f.readlines()]

121

+

images = [img for img in ordered_images if img in images]

122

+

elif args.randomize:

123

+

random.shuffle(images)

124

+

125

+

if not images:

126

+

print("No valid images found. Please check the image folder or the image_order_file.")

127

+

return

128

+

129

+

print('Creating slideshow...')

130

+

slideshow = create_slideshow(args.image_folder, audio_file, beat_times, images=images)

131

+

final_video = slideshow.set_audio(audio_file)

132

+

133

+

print('Writing video...')

134

+

final_video.write_videofile(args.output_file_path, fps=24, codec='libx264', audio_codec='aac')

135

+

136

+

137

+

if __name__ == "__main__":

138

+

main()

cyberes / Slideshow synced to beat

cyberes 修订了这个 Gist 1686796475. 跳至此修订

		@@ -0,0 +1,138 @@
1	+	#!/usr/bin/env python3
2	+	import argparse
3	+	import random
4	+	import os
5	+
6	+	import cv2
7	+	import librosa
8	+	import numpy as np
9	+	from moviepy.editor import *
10	+	from scipy.signal import butter, lfilter
11	+	from scipy.signal import find_peaks
12	+
13	+	# pip install opencv-python librosa numpy moviepy scipy
14	+
15	+
16	+	def detect_beats(audio_file_path, highcut=200, order=5, peak_distance=10, peak_height=0.01):
17	+	# Load the audio file
18	+	y, sr = librosa.load(audio_file_path)
19	+
20	+	# Apply a high-pass filter to isolate the bass frequencies
21	+	b, a = butter(order, highcut / (0.5 * sr), btype='high')
22	+	y_filtered = lfilter(b, a, y)
23	+
24	+	# Calculate the RMS energy of the filtered signal
25	+	rms = librosa.feature.rms(y=y_filtered, frame_length=1024, hop_length=512)[0]
26	+
27	+	# Normalize the RMS energy
28	+	rms_normalized = rms / np.max(rms)
29	+
30	+	# Detect the peaks in the RMS energy signal
31	+	peaks, _ = find_peaks(rms_normalized, distance=peak_distance, height=peak_height)
32	+
33	+	# Convert the peak indices to times
34	+	beat_times = librosa.frames_to_time(peaks, sr=sr, hop_length=512)
35	+
36	+	return beat_times
37	+
38	+
39	+	def create_slideshow(image_folder, audio_file, beat_times, max_duration=2, images=None):
40	+	if images is None:
41	+	images = [img for img in os.listdir(image_folder) if img.endswith(".jpg") or img.endswith(".png")]
42	+
43	+	clips = []
44	+	target_size = (1280, 720)
45	+
46	+	for i, beat_time in enumerate(beat_times[:-1]):
47	+	img_path = os.path.join(image_folder, images[i % len(images)])
48	+	img = cv2.imread(img_path)
49	+	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
50	+
51	+	# Resize the image while maintaining aspect ratio and fitting within the target size
52	+	height, width, _ = img.shape
53	+	target_width, target_height = target_size
54	+
55	+	scale_width = float(target_width) / float(width)
56	+	scale_height = float(target_height) / float(height)
57	+	scale_factor = min(scale_width, scale_height)
58	+
59	+	new_width = int(width * scale_factor)
60	+	new_height = int(height * scale_factor)
61	+
62	+	img_resized = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)
63	+
64	+	# Add padding
65	+	pad_top = max((target_height - new_height) // 2, 0)
66	+	pad_bottom = max(target_height - new_height - pad_top, 0)
67	+	pad_left = max((target_width - new_width) // 2, 0)
68	+	pad_right = max(target_width - new_width - pad_left, 0)
69	+
70	+	img_padded = cv2.copyMakeBorder(img_resized, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
71	+
72	+	duration = beat_times[i + 1] - beat_times[i]
73	+
74	+	# If the duration between two beats is greater than the max_duration, repeat the image
75	+	while duration > max_duration:
76	+	clip = ImageClip(img_padded, duration=max_duration)
77	+	clips.append(clip)
78	+	duration -= max_duration
79	+
80	+	clip = ImageClip(img_padded, duration=duration)
81	+	clips.append(clip)
82	+
83	+	slideshow = concatenate_videoclips(clips)
84	+	return slideshow
85	+
86	+
87	+	def main():
88	+	parser = argparse.ArgumentParser(description="Create a slideshow that matches the bass beats or lyrics of a song.")
89	+	parser.add_argument("image_folder", help="Path to the folder containing the images for the slideshow.")
90	+	parser.add_argument("audio_file_path", help="Path to the input audio file.")
91	+	parser.add_argument("output_file_path", help="Path to the output video file.")
92	+	parser.add_argument("--highcut", type=int, default=200, help="Cutoff frequency for the high-pass filter (default: 200 Hz).")
93	+	parser.add_argument("--order", type=int, default=5, help="Order of the Butterworth filter (default: 5).")
94	+	parser.add_argument("--peak-distance", type=int, default=10, help="Minimum number of samples between peaks (default: 10).")
95	+	parser.add_argument("--peak-height", type=float, default=0.01, help="Minimum height of a peak in the RMS energy signal (default: 0.01).")
96	+	parser.add_argument("--more-help", action="store_true", help="Show more help.")
97	+	parser.add_argument("--randomize", "-r", action="store_true", help="Randomize the order of the images in the slideshow.")
98	+	parser.add_argument("--image_order_file", "-f", help="Path to a text file containing the ordered list of image filenames (not file paths!).")
99	+	args = parser.parse_args()
100	+
101	+	if args.more_help:
102	+	print("""highcut: The cutoff frequency for the high-pass filter applied to isolate the bass frequencies. The default value is 200 Hz, which means that the filter will keep frequencies below 200 Hz (bass frequencies) and attenuate higher frequencies. You can adjust this value to focus on different frequency ranges of the bass.
103	+
104	+	order: The order of the Butterworth filter used for the high-pass filtering. A higher order results in a steeper roll-off, which means a more aggressive filtering. The default value is 5, which should work well for most cases. You can increase or decrease this value to change the sharpness of the filter.
105	+
106	+	peak_distance: The minimum number of samples between peaks in the RMS energy signal. This parameter helps to avoid detecting multiple peaks that are too close to each other. The default value is 10, which means that two peaks must be at least 10 samples apart to be considered separate peaks. You can adjust this value to control the minimum distance between detected beats.
107	+
108	+	peak_height: The minimum height of a peak in the normalized RMS energy signal. This parameter helps to filter out peaks that are too small and might not correspond to actual bass beats. The default value is 0.01, which means that a peak must have a height of at least 1% of the maximum RMS energy value to be considered a beat. You can adjust this value to control the minimum strength of detected beats.
109	+
110	+	When fine-tuning these parameters, you might want to start by adjusting highcut and peak_height to focus on the desired bass frequency range and beat strength. Then, you can experiment with the order and peak_distance parameters to further refine the beat detection. Keep in mind that the optimal values for these parameters might vary depending on the specific characteristics of the audio file you are working with.""")
111	+	quit()
112	+
113	+	print('Processing beats...')
114	+	beat_times = detect_beats(args.audio_file_path, highcut=args.highcut, order=args.order, peak_distance=args.peak_distance, peak_height=args.peak_height)
115	+	audio_file = AudioFileClip(args.audio_file_path)
116	+
117	+	images = [img for img in os.listdir(args.image_folder) if img.endswith(".jpg") or img.endswith(".png")]
118	+	if args.image_order_file:
119	+	with open(args.image_order_file, 'r') as f:
120	+	ordered_images = [line.strip() for line in f.readlines()]
121	+	images = [img for img in ordered_images if img in images]
122	+	elif args.randomize:
123	+	random.shuffle(images)
124	+
125	+	if not images:
126	+	print("No valid images found. Please check the image folder or the image_order_file.")
127	+	return
128	+
129	+	print('Creating slideshow...')
130	+	slideshow = create_slideshow(args.image_folder, audio_file, beat_times, images=images)
131	+	final_video = slideshow.set_audio(audio_file)
132	+
133	+	print('Writing video...')
134	+	final_video.write_videofile(args.output_file_path, fps=24, codec='libx264', audio_codec='aac')
135	+
136	+
137	+	if __name__ == "__main__":
138	+	main()