FPS_Enhancer

Paused

App Files Files Community

r3gm commited on 10 days ago

Commit

05035da

verified ·

1 Parent(s): 638b624

Upload inference_video_w.py

Browse files

Files changed (1) hide show

inference_video_w.py +316 -0

inference_video_w.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import os
+import cv2
+import torch
+import numpy as np
+from tqdm import tqdm
+from torch.nn import functional as F
+import warnings
+import _thread
+import skvideo.io
+from queue import Queue, Empty
+from model.pytorch_msssim import ssim_matlab
+import shutil
+import tempfile
+import time
+warnings.filterwarnings("ignore")
+# Utility class to mimic argparse object
+class Args:
+    def __init__(self, **kwargs):
+        self.__dict__.update(kwargs)
+def transferAudio(sourceVideo, targetVideo):
+    # generate a unique temp directory for this user
+    unique_temp_dir = tempfile.mkdtemp()
+    tempAudioFileName = os.path.join(unique_temp_dir, "audio.mkv")
+    # extract audio from video
+    os.system('ffmpeg -hide_banner -loglevel error -y -i "{}" -c:a copy -vn {}'.format(sourceVideo, tempAudioFileName))
+    targetNoAudio = os.path.splitext(targetVideo)[0] + "_noaudio" + os.path.splitext(targetVideo)[1]
+    os.rename(targetVideo, targetNoAudio)
+    # combine audio file and new video file
+    os.system('ffmpeg -hide_banner -loglevel error -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+    if os.path.getsize(targetVideo) == 0: # if ffmpeg failed to merge the video and audio together try converting the audio to aac
+        tempAudioFileName = os.path.join(unique_temp_dir, "audio.m4a")
+        os.system('ffmpeg -hide_banner -loglevel error -y -i "{}" -c:a aac -b:a 160k -vn {}'.format(sourceVideo, tempAudioFileName))
+        os.system('ffmpeg -hide_banner -loglevel error -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+        if (os.path.getsize(targetVideo) == 0): # if aac is not supported by selected format
+            os.rename(targetNoAudio, targetVideo)
+            print("Audio transfer failed. Interpolated video will have no audio")
+        else:
+            print("Lossless audio transfer failed. Audio was transcoded to AAC (M4A) instead.")
+            # remove audio-less video
+            os.remove(targetNoAudio)
+    else:
+        os.remove(targetNoAudio)
+    # remove temp directory
+    shutil.rmtree(unique_temp_dir)
+def inference(
+    video=None,
+    output=None,
+    img=None,
+    montage=False,
+    modelDir='train_log',
+    fp16=False,
+    UHD=False,
+    scale=1.0,
+    skip=False,
+    fps=None,
+    png=False,
+    ext='mp4',
+    exp=1,
+    multi=2
+):
+    # Initialize Arguments Object
+    args = Args(
+        video=video, output=output, img=img, montage=montage,
+        modelDir=modelDir, fp16=fp16, UHD=UHD, scale=scale,
+        skip=skip, fps=fps, png=png, ext=ext, exp=exp, multi=multi
+    )
+    # Argument Logic Adjustment
+    if args.exp != 1:
+        args.multi = (2 ** args.exp)
+    # Assertions
+    assert (not args.video is None or not args.img is None)
+    if args.skip:
+        print("skip flag is abandoned, please refer to issue #207.")
+    if args.UHD and args.scale==1.0:
+        args.scale = 0.5
+    assert args.scale in [0.25, 0.5, 1.0, 2.0, 4.0]
+    if not args.img is None:
+        args.png = True
+    # Device Setup
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    torch.set_grad_enabled(False)
+    if torch.cuda.is_available():
+        torch.backends.cudnn.enabled = True
+        torch.backends.cudnn.benchmark = True
+        if(args.fp16):
+            torch.set_default_tensor_type(torch.cuda.HalfTensor)
+    # Load Model
+    from train_log.RIFE_HDv3 import Model
+    model = Model()
+    if not hasattr(model, 'version'):
+        model.version = 0
+    model.load_model(args.modelDir, -1)
+    print("Loaded 3.x/4.x HD model.")
+    model.eval()
+    model.device()
+    # Video/Image Setup
+    if not args.video is None:
+        videoCapture = cv2.VideoCapture(args.video)
+        original_fps = videoCapture.get(cv2.CAP_PROP_FPS)
+        tot_frame = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)
+        videoCapture.release()
+        if args.fps is None or args.fps == 0:
+            fpsNotAssigned = True
+            args.fps = original_fps * args.multi
+        else:
+            fpsNotAssigned = False
+        videogen = skvideo.io.vreader(args.video)
+        lastframe = next(videogen)
+        # fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') # Unused in original logic for skvideo
+        video_path_wo_ext, ext = os.path.splitext(args.video)
+        print('{}.{}, {} frames in total, {}FPS to {}FPS'.format(video_path_wo_ext, args.ext, tot_frame, original_fps, args.fps))
+        if args.png == False and fpsNotAssigned == True:
+            print("The audio will be merged after interpolation process")
+        else:
+            print("Will not merge audio because using png or fps flag!")
+    else:
+        videogen = []
+        for f in os.listdir(args.img):
+            if 'png' in f:
+                videogen.append(f)
+        tot_frame = len(videogen)
+        videogen.sort(key= lambda x:int(x[:-4]))
+        lastframe = cv2.imread(os.path.join(args.img, videogen[0]), cv2.IMREAD_UNCHANGED)[:, :, ::-1].copy()
+        videogen = videogen[1:]
+    h, w, _ = lastframe.shape
+    vid_out_name = None
+    vid_out = None
+    if args.png:
+        if not os.path.exists('vid_out'):
+            os.mkdir('vid_out')
+    else:
+        if args.output is not None:
+            vid_out_name = args.output
+        else:
+            vid_out_name = '{}_{}X_{}fps.{}'.format(video_path_wo_ext, args.multi, int(np.round(args.fps)), args.ext)
+        outputdict = {
+            '-c:v': 'libx264',
+            '-crf': '17',
+            '-preset': 'slow',
+            '-pix_fmt': 'yuv420p'
+        }
+        vid_out = skvideo.io.FFmpegWriter(vid_out_name, inputdict={'-r': str(args.fps)}, outputdict=outputdict)
+    # --- Nested Helper Functions to capture 'args', 'model', 'vid_out' scope ---
+    def clear_write_buffer(write_buffer):
+        cnt = 0
+        while True:
+            item = write_buffer.get()
+            if item is None:
+                break
+            if args.png:
+                cv2.imwrite('vid_out/{:0>7d}.png'.format(cnt), item[:, :, ::-1])
+                cnt += 1
+            else:
+                vid_out.writeFrame(item)
+    def build_read_buffer(read_buffer, videogen):
+        try:
+            for frame in videogen:
+                if not args.img is None:
+                    frame = cv2.imread(os.path.join(args.img, frame), cv2.IMREAD_UNCHANGED)[:, :, ::-1].copy()
+                if args.montage:
+                    frame = frame[:, left: left + w]
+                read_buffer.put(frame)
+        except:
+            pass
+        read_buffer.put(None)
+    def make_inference(I0, I1, n):
+        if model.version >= 3.9:
+            res = []
+            for i in range(n):
+                res.append(model.inference(I0, I1, (i+1) * 1. / (n+1), args.scale))
+            return res
+        else:
+            middle = model.inference(I0, I1, args.scale)
+            if n == 1:
+                return [middle]
+            first_half = make_inference(I0, middle, n=n//2)
+            second_half = make_inference(middle, I1, n=n//2)
+            if n%2:
+                return [*first_half, middle, *second_half]
+            else:
+                return [*first_half, *second_half]
+    def pad_image(img):
+        if(args.fp16):
+            return F.pad(img, padding).half()
+        else:
+            return F.pad(img, padding)
+    # --- Pre-Loop Setup ---
+    left = 0 # Define default
+    if args.montage:
+        left = w // 4
+        w = w // 2
+    tmp = max(128, int(128 / args.scale))
+    ph = ((h - 1) // tmp + 1) * tmp
+    pw = ((w - 1) // tmp + 1) * tmp
+    padding = (0, pw - w, 0, ph - h)
+    pbar = tqdm(total=tot_frame)
+    if args.montage:
+        lastframe = lastframe[:, left: left + w]
+    write_buffer = Queue(maxsize=500)
+    read_buffer = Queue(maxsize=500)
+    # Start threads
+    _thread.start_new_thread(build_read_buffer, (read_buffer, videogen))
+    _thread.start_new_thread(clear_write_buffer, (write_buffer,))
+    I1 = torch.from_numpy(np.transpose(lastframe, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+    I1 = pad_image(I1)
+    temp = None
+    # --- Main Loop ---
+    while True:
+        if temp is not None:
+            frame = temp
+            temp = None
+        else:
+            frame = read_buffer.get()
+        if frame is None:
+            break
+        I0 = I1
+        I1 = torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+        I1 = pad_image(I1)
+        I0_small = F.interpolate(I0, (32, 32), mode='bilinear', align_corners=False)
+        I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+        ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+        break_flag = False
+        if ssim > 0.996:
+            frame = read_buffer.get() # read a new frame
+            if frame is None:
+                break_flag = True
+                frame = lastframe
+            else:
+                temp = frame
+            I1 = torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+            I1 = pad_image(I1)
+            I1 = model.inference(I0, I1, scale=args.scale)
+            I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+            ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+            frame = (I1[0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w]
+        if ssim < 0.2:
+            output_frames = []
+            for i in range(args.multi - 1):
+                output_frames.append(I0)
+        else:
+            output_frames = make_inference(I0, I1, args.multi - 1)
+        if args.montage:
+            write_buffer.put(np.concatenate((lastframe, lastframe), 1))
+            for mid in output_frames:
+                mid = (((mid[0] * 255.).byte().cpu().numpy().transpose(1, 2, 0)))
+                write_buffer.put(np.concatenate((lastframe, mid[:h, :w]), 1))
+        else:
+            write_buffer.put(lastframe)
+            for mid in output_frames:
+                mid = (((mid[0] * 255.).byte().cpu().numpy().transpose(1, 2, 0)))
+                write_buffer.put(mid[:h, :w])
+        pbar.update(1)
+        lastframe = frame
+        if break_flag:
+            break
+    if args.montage:
+        write_buffer.put(np.concatenate((lastframe, lastframe), 1))
+    else:
+        write_buffer.put(lastframe)
+    write_buffer.put(None)
+    while(not write_buffer.empty()):
+        time.sleep(0.1)
+    pbar.close()
+    if not vid_out is None:
+        vid_out.close()
+    # Audio Transfer Logic
+    if args.png == False and fpsNotAssigned == True and not args.video is None:
+        try:
+            transferAudio(args.video, vid_out_name)
+        except:
+            print("Audio transfer failed. Interpolated video will have no audio")
+            targetNoAudio = os.path.splitext(vid_out_name)[0] + "_noaudio" + os.path.splitext(vid_out_name)[1]
+            os.rename(targetNoAudio, vid_out_name)
+    return vid_out_name