From 14f39145740c822937e0e6bc84b173b0a6017224 Mon Sep 17 00:00:00 2001
From: Cyberes <cyberes@evulid.cc>
Date: Tue, 7 Feb 2023 13:15:59 -0700
Subject: [PATCH] reload input file, more arg stuff

---
 downloader.py      | 83 +++++++++++++++++++++++++---------------------
 process/threads.py |  2 +-
 requirements.txt   |  3 +-
 3 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/downloader.py b/downloader.py
index 29ed197..cfd8d63 100755
--- a/downloader.py
+++ b/downloader.py
@@ -52,7 +52,7 @@ parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon m
 parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.')
 parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'), help='The path to the directory to track downloaded videos. Defaults to your appdata path.')
 parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.")
-parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and let youtube-dl handle everything.')
+parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything.')
 parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', help='Erase the tracked video file.')
 parser.add_argument('--ratelimit-sleep', type=int, default=5,
                     help='How many seconds to sleep between items to prevent rate-limiting. Does not affect time between videos as you should be fine since it takes a few seconds to merge everything and clean up.')
@@ -85,43 +85,50 @@ if args.verbose:
 
 log_time = time.time()
 
-# Get the URLs of the videos to download. Is the input a URL or file?
-url_list = {}
-if not re.match(url_regex, str(args.file)) or args.input_datatype in ('txt', 'yaml'):
-    args.file = resolve_path(args.file)
-    if not args.file.exists():
-        print('Input file does not exist:', args.file)
-        sys.exit(1)
-    input_file = [x.strip().strip('\n') for x in list(args.file.open())]
-    if args.input_datatype == 'yaml' or (re.match(r'^.*?:\w*', input_file[0]) and args.input_datatype == 'auto'):
-        with open(args.file, 'r') as file:
-            try:
-                url_list = yaml.safe_load(file)
-            except yaml.YAMLError as e:
-                print('Failed to load config file, error:', e)
-                sys.exit(1)
-    elif args.input_datatype == 'txt' or (re.match(url_regex, input_file[0]) and args.input_datatype == 'auto'):
-        if not args.output:
-            print('You must specify an output path with --output when the input datatype is a text file.')
+
+def load_input_file():
+    # Get the URLs of the videos to download. Is the input a URL or file?
+    url_list = {}
+    if not re.match(url_regex, str(args.file)) or args.input_datatype in ('txt', 'yaml'):
+        args.file = resolve_path(args.file)
+        if not args.file.exists():
+            print('Input file does not exist:', args.file)
             sys.exit(1)
-        url_list[str(args.output)] = input_file
+        input_file = [x.strip().strip('\n') for x in list(args.file.open())]
+        if args.input_datatype == 'yaml' or (re.match(r'^.*?:\w*', input_file[0]) and args.input_datatype == 'auto'):
+            with open(args.file, 'r') as file:
+                try:
+                    url_list = yaml.safe_load(file)
+                except yaml.YAMLError as e:
+                    print('Failed to load config file, error:', e)
+                    sys.exit(1)
+        elif args.input_datatype == 'txt' or (re.match(url_regex, input_file[0]) and args.input_datatype == 'auto'):
+            if not args.output:
+                args.output = resolve_path(Path(os.getcwd(), 'automated-youtube-dl_output'))
+                # print('You must specify an output path with --output when the input datatype is a text file.')
+                # sys.exit(1)
+            url_list[str(args.output)] = input_file
+        else:
+            print('Unknown file type:', args.input_datatype)
+            print(input_file)
+            sys.exit(1)
+        del input_file  # release file object
+        # Verify each line in the file is a valid URL.
+        for directory, urls in url_list.items():
+            for item in urls:
+                if not re.match(url_regex, str(item)):
+                    print(f'Not a url:', item)
+                    sys.exit(1)
     else:
-        print('Unknown file type:', args.input_datatype)
-        print(input_file)
-        sys.exit(1)
-    del input_file  # release file object
-    # Verify each line in the file is a valid URL.
-    for directory, urls in url_list.items():
-        for item in urls:
-            if not re.match(url_regex, str(item)):
-                print(f'Not a url:', item)
-                sys.exit(1)
-else:
-    if not args.output:
-        args.output = resolve_path(Path(os.getcwd(), 'automated-youtube-dl_output'))
-        # print('You must specify an output path with --output when the input is a URL.')
-        # sys.exit(1)
-    url_list[str(args.output)] = [args.file]
+        if not args.output:
+            args.output = resolve_path(Path(os.getcwd(), 'automated-youtube-dl_output'))
+            # print('You must specify an output path with --output when the input is a URL.')
+            # sys.exit(1)
+        url_list[str(args.output)] = [args.file]
+    return url_list
+
+
+url_list = load_input_file()
 
 # Create directories AFTER loading the file
 create_directories(*url_list.keys(), args.download_cache_file_directory)
@@ -319,6 +326,8 @@ while True:
                 progress_bar.update()
                 continue
 
+            url_list = load_input_file()
+
             download_archive_file = args.download_cache_file_directory / (str(playlist['id']) + '.log')
             if args.erase_downloaded_tracker and not already_erased_downloaded_tracker:
                 if download_archive_file.exists():
@@ -361,7 +370,7 @@ while True:
                         # OSError: [Errno 25] Inappropriate ioctl for device
                         status_bar.set_description_str('=' * os.get_terminal_size()[0])
                     logger.info('Starting downloads...')
-                    for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), }) for video in download_queue)):
+                    for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), 'ignore_downloaded': args.ignore_downloaded}) for video in download_queue)):
                         # Save the video ID to the file
                         if result['downloaded_video_id']:
                             download_archive_logger.info(result['downloaded_video_id'])
diff --git a/process/threads.py b/process/threads.py
index 9af74fa..2355b62 100644
--- a/process/threads.py
+++ b/process/threads.py
@@ -72,7 +72,7 @@ def download_video(args) -> dict:
     kwargs = args[1]
     output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []}  # empty object
 
-    if not video['channel_id'] or not video['channel'] or not video['channel_url']:
+    if not kwargs['ignore_downloaded'] and not video['channel_id'] or not video['channel'] or not video['channel_url']:
         if video['duration'] or isinstance(video['view_count'], int):
             # Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
             pass
diff --git a/requirements.txt b/requirements.txt
index 4548bb7..3775d92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,4 @@ mergedeep
 numpy
 pyyaml
 appdirs
-phantomjs
-scrapetube
\ No newline at end of file
+phantomjs
\ No newline at end of file