From 98add035f2e7707b38471e6495f33907798187ca Mon Sep 17 00:00:00 2001
From: Xintao <wxt1994@126.com>
Date: Sun, 24 Apr 2022 17:22:43 +0800
Subject: [PATCH] support realesr-animevideov3

---
 docs/anime_video_model.md     | 43 ++++++++++++------------
 inference_realesrgan_video.py | 63 +++++++++++++----------------------
 2 files changed, 46 insertions(+), 60 deletions(-)
diff --git a/docs/anime_video_model.md b/docs/anime_video_model.md
index e34ad80..513b07e 100644
--- a/docs/anime_video_model.md
+++ b/docs/anime_video_model.md
@@ -2,29 +2,30 @@
 
 :white_check_mark: We add small models that are optimized for anime videos :-)
 
+- [How to Use](#how-to-use)
+- [PyTorch Inference](#pytorch-inference)
+- [ncnn Executable File](#ncnn-executable-file)
+  - [Step 1: Use ffmpeg to extract frames from video](#step-1-use-ffmpeg-to-extract-frames-from-video)
+  - [Step 2: Inference with Real-ESRGAN executable file](#step-2-inference-with-real-esrgan-executable-file)
+  - [Step 3: Merge the enhanced frames back into a video](#step-3-merge-the-enhanced-frames-back-into-a-video)
+- [More Demos](#more-demos)
+
 | Models                                                                                                                             | Scale | Description                    |
 | ---------------------------------------------------------------------------------------------------------------------------------- | :---- | :----------------------------- |
-| [RealESRGANv2-animevideo-xsx2](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.3.0/RealESRGANv2-animevideo-xsx2.pth) | X2    | Anime video model with XS size |
-| [RealESRGANv2-animevideo-xsx4](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.3.0/RealESRGANv2-animevideo-xsx4.pth) | X4    | Anime video model with XS size |
+| [realesr-animevideov3](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth) | X4 <sup>1</sup>   | Anime video model with XS size |
 
-- [Anime Video Models](#anime-video-models)
-  - [How to Use](#how-to-use)
-    - [PyTorch Inference](#pytorch-inference)
-    - [ncnn Executable File](#ncnn-executable-file)
-      - [Step 1: Use ffmpeg to extract frames from video](#step-1-use-ffmpeg-to-extract-frames-from-video)
-      - [Step 2: Inference with Real-ESRGAN executable file](#step-2-inference-with-real-esrgan-executable-file)
-      - [Step 3: Merge the enhanced frames back into a video](#step-3-merge-the-enhanced-frames-back-into-a-video)
-  - [More Demos](#more-demos)
+Note: <br>
+<sup>1</sup> This model can also be used for X1, X2, X3.
 
 ---
 
 The following are some demos (best view in the full screen mode).
 
-https://user-images.githubusercontent.com/17445847/145706977-98bc64a4-af27-481c-8abe-c475e15db7ff.MP4
+<https://user-images.githubusercontent.com/17445847/145706977-98bc64a4-af27-481c-8abe-c475e15db7ff.MP4>
 
-https://user-images.githubusercontent.com/17445847/145707055-6a4b79cb-3d9d-477f-8610-c6be43797133.MP4
+<https://user-images.githubusercontent.com/17445847/145707055-6a4b79cb-3d9d-477f-8610-c6be43797133.MP4>
 
-https://user-images.githubusercontent.com/17445847/145783523-f4553729-9f03-44a8-a7cc-782aadf67b50.MP4
+<https://user-images.githubusercontent.com/17445847/145783523-f4553729-9f03-44a8-a7cc-782aadf67b50.MP4>
 
 ## How to Use
 
@@ -32,12 +33,12 @@ https://user-images.githubusercontent.com/17445847/145783523-f4553729-9f03-44a8-
 
 ```bash
 # download model
-wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.3.0/RealESRGANv2-animevideo-xsx2.pth -P experiments/pretrained_models
+wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth -P realesrgan/weights
 # inference
-python inference_realesrgan_video.py -i inputs/video/onepiece_demo.mp4 -n RealESRGANv2-animevideo-xsx2 -s 2 -v -a --half --suffix outx2
+python inference_realesrgan_video.py -i inputs/video/onepiece_demo.mp4 -n realesr-animevideov3 -s 2 --suffix outx2
 ```
 
-### ncnn Executable File
+### NCNN Executable File
 
 #### Step 1: Use ffmpeg to extract frames from video
 
@@ -49,12 +50,12 @@ ffmpeg -i onepiece_demo.mp4 -qscale:v 1 -qmin 1 -qmax 1 -vsync 0 tmp_frames/fram
 
 #### Step 2: Inference with Real-ESRGAN executable file
 
-1. Download the latest portable [Windows](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.3.0/realesrgan-ncnn-vulkan-20211212-windows.zip) / [Linux](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.3.0/realesrgan-ncnn-vulkan-20211212-ubuntu.zip) / [MacOS](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.3.0/realesrgan-ncnn-vulkan-20211212-macos.zip) **executable files for Intel/AMD/Nvidia GPU**
+1. Download the latest portable [Windows](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-windows.zip) / [Linux](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-ubuntu.zip) / [MacOS](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-macos.zip) **executable files for Intel/AMD/Nvidia GPU**
 
 1. Taking the Windows as example, run:
 
     ```bash
-    ./realesrgan-ncnn-vulkan.exe -i tmp_frames -o out_frames -n RealESRGANv2-animevideo-xsx2 -s 2 -f jpg
+    ./realesrgan-ncnn-vulkan.exe -i tmp_frames -o out_frames -n realesr-animevideov3 -s 2 -f jpg
     ```
 
     - Remember to create the folder `out_frames` ahead
@@ -110,12 +111,12 @@ ffmpeg -i onepiece_demo.mp4 -qscale:v 1 -qmin 1 -qmax 1 -vsync 0 tmp_frames/fram
 
 - Input video for One Piece:
 
-    https://user-images.githubusercontent.com/17445847/145706822-0e83d9c4-78ef-40ee-b2a4-d8b8c3692d17.mp4
+    <https://user-images.githubusercontent.com/17445847/145706822-0e83d9c4-78ef-40ee-b2a4-d8b8c3692d17.mp4>
 
 - Out video for One Piece
 
-    https://user-images.githubusercontent.com/17445847/145706827-384108c0-78f6-4aa7-9621-99d1aaf65682.mp4
+    <https://user-images.githubusercontent.com/17445847/164960481-759658cf-fcb8-480c-b888-cecb606e8744.mp4>
 
 **More comparisons**
 
-https://user-images.githubusercontent.com/17445847/145707458-04a5e9b9-2edd-4d1f-b400-380a72e5f5e6.MP4
+<https://user-images.githubusercontent.com/17445847/145707458-04a5e9b9-2edd-4d1f-b400-380a72e5f5e6.MP4>
diff --git a/inference_realesrgan_video.py b/inference_realesrgan_video.py
index e58e20d..3008397 100644
--- a/inference_realesrgan_video.py
+++ b/inference_realesrgan_video.py
@@ -24,11 +24,10 @@ def main():
         '-n',
         '--model_name',
         type=str,
-        default='RealESRGAN_x4plus',
-        help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
-              'RealESRGANv2-anime-xsx2 | RealESRGANv2-animevideo-xsx2-nousm | RealESRGANv2-animevideo-xsx2 | '
-              'RealESRGANv2-anime-xsx4 | RealESRGANv2-animevideo-xsx4-nousm | RealESRGANv2-animevideo-xsx4. '
-              'Default: RealESRGAN_x4plus'))
+        default='realesr-animevideov3',
+        help=('Model names: realesr-animevideov3 | RealESRGAN_x4plus_anime_6B | RealESRGAN_x4plus | RealESRNet_x4plus |'
+              ' RealESRGAN_x2plus | '
+              'Default:realesr-animevideov3'))
     parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
     parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
     parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored video')
@@ -36,9 +35,8 @@ def main():
     parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
     parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
     parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
-    parser.add_argument('--half', action='store_true', help='Use half precision during inference')
-    parser.add_argument('-v', '--video', action='store_true', help='Output a video using ffmpeg')
-    parser.add_argument('-a', '--audio', action='store_true', help='Keep audio')
+    parser.add_argument(
+        '--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
     parser.add_argument('--fps', type=float, default=None, help='FPS of the output video')
     parser.add_argument('--consumer', type=int, default=4, help='Number of IO consumers')
 
@@ -55,7 +53,7 @@ def main():
     args = parser.parse_args()
 
     # ---------------------- determine models according to model names ---------------------- #
-    args.model_name = args.model_name.split('.')[0]
+    args.model_name = args.model_name.split('.pth')[0]
     if args.model_name in ['RealESRGAN_x4plus', 'RealESRNet_x4plus']:  # x4 RRDBNet model
         model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
         netscale = 4
@@ -65,14 +63,7 @@ def main():
     elif args.model_name in ['RealESRGAN_x2plus']:  # x2 RRDBNet model
         model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
         netscale = 2
-    elif args.model_name in [
-            'RealESRGANv2-anime-xsx2', 'RealESRGANv2-animevideo-xsx2-nousm', 'RealESRGANv2-animevideo-xsx2'
-    ]:  # x2 VGG-style model (XS size)
-        model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=2, act_type='prelu')
-        netscale = 2
-    elif args.model_name in [
-            'RealESRGANv2-anime-xsx4', 'RealESRGANv2-animevideo-xsx4-nousm', 'RealESRGANv2-animevideo-xsx4'
-    ]:  # x4 VGG-style model (XS size)
+    elif args.model_name in ['realesr-animevideov3']:  # x4 VGG-style model (XS size)
         model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
         netscale = 4
 
@@ -91,7 +82,7 @@ def main():
         tile=args.tile,
         tile_pad=args.tile_pad,
         pre_pad=args.pre_pad,
-        half=args.half)
+        half=not args.fp32)
 
     if args.face_enhance:  # Use GFPGAN for face enhancement
         from gfpgan import GFPGANer
@@ -106,6 +97,7 @@ def main():
     save_frame_folder = os.path.join(args.output, 'frames_tmpout')
     os.makedirs(save_frame_folder, exist_ok=True)
 
+    # input can be a video file / a folder of frames / an image
     if mimetypes.guess_type(args.input)[0].startswith('video'):  # is a video file
         video_name = os.path.splitext(os.path.basename(args.input))[0]
         frame_folder = os.path.join('tmp_frames', video_name)
@@ -114,13 +106,13 @@ def main():
         os.system(f'ffmpeg -i {args.input} -qscale:v 1 -qmin 1 -qmax 1 -vsync 0  {frame_folder}/frame%08d.png')
         # get image path list
         paths = sorted(glob.glob(os.path.join(frame_folder, '*')))
-        if args.video:
-            if args.fps is None:
-                # get input video fps
-                import ffmpeg
-                probe = ffmpeg.probe(args.input)
-                video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
-                args.fps = eval(video_streams[0]['avg_frame_rate'])
+        # get input video fps
+        if args.fps is None:
+
+            import ffmpeg
+            probe = ffmpeg.probe(args.input)
+            video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
+            args.fps = eval(video_streams[0]['avg_frame_rate'])
     elif mimetypes.guess_type(args.input)[0].startswith('image'):  # is an image file
         paths = [args.input]
         video_name = 'video'
@@ -180,20 +172,13 @@ def main():
     pbar.close()
 
     # merge frames to video
-    if args.video:
-        video_save_path = os.path.join(args.output, f'{video_name}_{args.suffix}.mp4')
-        if args.audio:
-            os.system(
-                f'ffmpeg -r {args.fps} -i {save_frame_folder}/frame%08d_out.{extension} -i {args.input}'
-                f' -map 0:v:0 -map 1:a:0 -c:a copy -c:v libx264 -r {args.fps} -pix_fmt yuv420p  {video_save_path}')
-        else:
-            os.system(f'ffmpeg -r {args.fps} -i {save_frame_folder}/frame%08d_out.{extension} '
-                      f'-c:v libx264 -r {args.fps} -pix_fmt yuv420p {video_save_path}')
-
-        # delete tmp file
-        shutil.rmtree(save_frame_folder)
-        if os.path.isdir(frame_folder):
-            shutil.rmtree(frame_folder)
+    video_save_path = os.path.join(args.output, f'{video_name}_{args.suffix}.mp4')
+    os.system(f'ffmpeg -r {args.fps} -i {save_frame_folder}/frame%08d_out.{extension} -i {args.input}'
+              f' -map 0:v:0 -map 1:a:0 -c:a copy -c:v libx264 -r {args.fps} -pix_fmt yuv420p  {video_save_path}')
+    # delete tmp file
+    shutil.rmtree(save_frame_folder)
+    if os.path.isdir(frame_folder):
+        shutil.rmtree(frame_folder)
 
 
 if __name__ == '__main__':