scripts/tools/vimeo-audio-and-video.py

106 lines
3.1 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import base64
import os
import re
import requests
import subprocess
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--url-list', help='List of URL/filename pairs, delimited by tabs')
args = parser.parse_args()
for line in open(args.url_list):
output_file, master_json_url = line.rstrip().split('\t')
print('\n\n\nProcessing %s' % output_file)
# Extract some stuff
base_url = master_json_url[:master_json_url.rfind('/', 0, -26) + 1]
resp = requests.get(master_json_url)
content = resp.json()
# Video download here
heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
idx = max(heights, key=lambda x: x[1])[0]
video = content['video'][idx]
video_base_url = base_url + content['base_url'] + video['base_url']
print('Base url:', video_base_url)
filenameVideo = 'video_%s.mp4' % video['id']
print('Saving VIDEO to %s' % filenameVideo)
video_file = open(filenameVideo, 'wb')
init_segment = base64.b64decode(video['init_segment'])
video_file.write(init_segment)
for segment in tqdm(video['segments']):
segment_url = video_base_url + segment['url']
print(segment_url + '\n')
print(video_base_url+ '\n')
print(segment['url']+ '\n')
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(resp)
print(segment_url)
break
for chunk in resp:
video_file.write(chunk)
video_file.flush()
video_file.close()
# Audio download here
bitrate = [(i, d['bitrate']) for (i, d) in enumerate(content['audio'])]
print('Bitrate', bitrate)
idx = max(bitrate, key=lambda x: x[1])[0]
audio = content['audio'][idx]
audio_base_url = base_url + content['base_url'] + audio['base_url']
print('Base url:', audio_base_url)
filenameAudio = 'audio_%s.mp4' % audio['id']
print('Saving AUDIO to %s' % filenameAudio)
audio_file = open(filenameAudio, 'wb')
init_segment = base64.b64decode(audio['init_segment'])
audio_file.write(init_segment)
for segment in tqdm(audio['segments']):
segment_url = audio_base_url + segment['url']
segment_url = re.sub(r'/[a-zA-Z0-9_-]*/\.\./',r'/',segment_url.rstrip())
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(resp)
print(segment_url)
break
for chunk in resp:
audio_file.write(chunk)
audio_file.flush()
audio_file.close()
# Combine audio and video here
print('Combining video and audio...')
cmd = 'ffmpeg -y -i '
cmd += filenameAudio
cmd += ' -i '
cmd += filenameVideo
cmd += ' ' + output_file
subprocess.call(cmd, shell=True)
print('Mixing Done!')
# Delete the remaining single audio and video files
# os.remove(filenameAudio)
# os.remove(filenameVideo)
# print("Temporary files removed!")
# Log the conclusion of the operations
print("*** VIDEO DOWNLOADED SUCCESSFULLY ***")
print(video['base_url'])