From 14f39145740c822937e0e6bc84b173b0a6017224 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Tue, 7 Feb 2023 13:15:59 -0700 Subject: [PATCH] reload input file, more arg stuff --- downloader.py | 83 +++++++++++++++++++++++++--------------------- process/threads.py | 2 +- requirements.txt | 3 +- 3 files changed, 48 insertions(+), 40 deletions(-) diff --git a/downloader.py b/downloader.py index 29ed197..cfd8d63 100755 --- a/downloader.py +++ b/downloader.py @@ -52,7 +52,7 @@ parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon m parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.') parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'), help='The path to the directory to track downloaded videos. Defaults to your appdata path.') parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.") -parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and let youtube-dl handle everything.') +parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything.') parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', help='Erase the tracked video file.') parser.add_argument('--ratelimit-sleep', type=int, default=5, help='How many seconds to sleep between items to prevent rate-limiting. Does not affect time between videos as you should be fine since it takes a few seconds to merge everything and clean up.') @@ -85,43 +85,50 @@ if args.verbose: log_time = time.time() -# Get the URLs of the videos to download. Is the input a URL or file? -url_list = {} -if not re.match(url_regex, str(args.file)) or args.input_datatype in ('txt', 'yaml'): - args.file = resolve_path(args.file) - if not args.file.exists(): - print('Input file does not exist:', args.file) - sys.exit(1) - input_file = [x.strip().strip('\n') for x in list(args.file.open())] - if args.input_datatype == 'yaml' or (re.match(r'^.*?:\w*', input_file[0]) and args.input_datatype == 'auto'): - with open(args.file, 'r') as file: - try: - url_list = yaml.safe_load(file) - except yaml.YAMLError as e: - print('Failed to load config file, error:', e) - sys.exit(1) - elif args.input_datatype == 'txt' or (re.match(url_regex, input_file[0]) and args.input_datatype == 'auto'): - if not args.output: - print('You must specify an output path with --output when the input datatype is a text file.') + +def load_input_file(): + # Get the URLs of the videos to download. Is the input a URL or file? + url_list = {} + if not re.match(url_regex, str(args.file)) or args.input_datatype in ('txt', 'yaml'): + args.file = resolve_path(args.file) + if not args.file.exists(): + print('Input file does not exist:', args.file) sys.exit(1) - url_list[str(args.output)] = input_file + input_file = [x.strip().strip('\n') for x in list(args.file.open())] + if args.input_datatype == 'yaml' or (re.match(r'^.*?:\w*', input_file[0]) and args.input_datatype == 'auto'): + with open(args.file, 'r') as file: + try: + url_list = yaml.safe_load(file) + except yaml.YAMLError as e: + print('Failed to load config file, error:', e) + sys.exit(1) + elif args.input_datatype == 'txt' or (re.match(url_regex, input_file[0]) and args.input_datatype == 'auto'): + if not args.output: + args.output = resolve_path(Path(os.getcwd(), 'automated-youtube-dl_output')) + # print('You must specify an output path with --output when the input datatype is a text file.') + # sys.exit(1) + url_list[str(args.output)] = input_file + else: + print('Unknown file type:', args.input_datatype) + print(input_file) + sys.exit(1) + del input_file # release file object + # Verify each line in the file is a valid URL. + for directory, urls in url_list.items(): + for item in urls: + if not re.match(url_regex, str(item)): + print(f'Not a url:', item) + sys.exit(1) else: - print('Unknown file type:', args.input_datatype) - print(input_file) - sys.exit(1) - del input_file # release file object - # Verify each line in the file is a valid URL. - for directory, urls in url_list.items(): - for item in urls: - if not re.match(url_regex, str(item)): - print(f'Not a url:', item) - sys.exit(1) -else: - if not args.output: - args.output = resolve_path(Path(os.getcwd(), 'automated-youtube-dl_output')) - # print('You must specify an output path with --output when the input is a URL.') - # sys.exit(1) - url_list[str(args.output)] = [args.file] + if not args.output: + args.output = resolve_path(Path(os.getcwd(), 'automated-youtube-dl_output')) + # print('You must specify an output path with --output when the input is a URL.') + # sys.exit(1) + url_list[str(args.output)] = [args.file] + return url_list + + +url_list = load_input_file() # Create directories AFTER loading the file create_directories(*url_list.keys(), args.download_cache_file_directory) @@ -319,6 +326,8 @@ while True: progress_bar.update() continue + url_list = load_input_file() + download_archive_file = args.download_cache_file_directory / (str(playlist['id']) + '.log') if args.erase_downloaded_tracker and not already_erased_downloaded_tracker: if download_archive_file.exists(): @@ -361,7 +370,7 @@ while True: # OSError: [Errno 25] Inappropriate ioctl for device status_bar.set_description_str('=' * os.get_terminal_size()[0]) logger.info('Starting downloads...') - for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), }) for video in download_queue)): + for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), 'ignore_downloaded': args.ignore_downloaded}) for video in download_queue)): # Save the video ID to the file if result['downloaded_video_id']: download_archive_logger.info(result['downloaded_video_id']) diff --git a/process/threads.py b/process/threads.py index 9af74fa..2355b62 100644 --- a/process/threads.py +++ b/process/threads.py @@ -72,7 +72,7 @@ def download_video(args) -> dict: kwargs = args[1] output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object - if not video['channel_id'] or not video['channel'] or not video['channel_url']: + if not kwargs['ignore_downloaded'] and not video['channel_id'] or not video['channel'] or not video['channel_url']: if video['duration'] or isinstance(video['view_count'], int): # Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts. pass diff --git a/requirements.txt b/requirements.txt index 4548bb7..3775d92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,4 @@ mergedeep numpy pyyaml appdirs -phantomjs -scrapetube \ No newline at end of file +phantomjs \ No newline at end of file