Added rating selection and now works

This commit is contained in:
chavinlo 2022-09-10 19:38:22 -05:00
parent 4904ba4db3
commit f07fb01490
1 changed files with 58 additions and 6 deletions

View File

@ -1,6 +1,8 @@
## This script WAS NOT USED on the weights released by ProjectAI Touhou on 8th of september, 2022. ## This script WAS NOT USED on the weights released by ProjectAI Touhou on 8th of september, 2022.
## This script CAN convert tags to human-readable-text BUT IT IS NOT REQUIRED. ## This script CAN convert tags to human-readable-text BUT IT IS NOT REQUIRED.
import argparse import argparse
import string
#Stolen code from https://stackoverflow.com/a/43357954 #Stolen code from https://stackoverflow.com/a/43357954
def str2bool(v): def str2bool(v):
if isinstance(v, bool): if isinstance(v, bool):
@ -12,13 +14,38 @@ def str2bool(v):
else: else:
raise argparse.ArgumentTypeError('Boolean value expected.') raise argparse.ArgumentTypeError('Boolean value expected.')
def ratingparsing(input):
v = input.lower()
ratingsSelected = " "
if "a" in v:
ratingsSelected = "e g q s"
if "e" in v:
ratingsSelected = ratingsSelected + "e "
if "g" in v:
ratingsSelected = ratingsSelected + "g "
if "q" in v:
ratingsSelected = ratingsSelected + "q "
if "s" in v:
ratingsSelected = ratingsSelected + "s "
if ratingsSelected == " ":
raise Exception('a/e/g/q/s expected')
print("Ratings selected: " + ratingsSelected)
return(ratingsSelected)
## In the future someone might want to access this via import. Consider adding support for that ## In the future someone might want to access this via import. Consider adding support for that
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--jsonpath', '-J', type=str, help='Path to JSONL file with the metadata', required = True) parser.add_argument('--jsonpath', '-J', type=str, help='Path to JSONL file with the metadata', required = True)
parser.add_argument('--extractpath', '-E', type=str, help='Path to the folder where to extract the images and text files', required = True) parser.add_argument('--extractpath', '-E', type=str, help='Path to the folder where to extract the images and text files', required = True)
parser.add_argument('--imagespath', '-I', type=str, help='Path to the folder with the images', required = False, default="512px") parser.add_argument('--imagespath', '-I', type=str, help='Path to the folder with the images', required = False, default="512px")
parser.add_argument('--convtohuman', '-H', type=str2bool, help='Convert to human-readable-text', required = False, default=True) parser.add_argument('--convtohuman', '-H', type=str2bool, help='Convert to human-readable-text', required = False, default=False)
parser.add_argument('--rating', '-R', type=ratingparsing, help='Extract specific rating/s [a/e/g/q/s]', required = False, default='a')
args = parser.parse_args() args = parser.parse_args()
if args.convtohuman == True:
print("tag conversion to human is currently somewhat broken. If you still want to use it remove line 25")
#Q: What is broken?
#A: tag_separator sometimes appears at to_write without anything behind it. It should be an easy fix where tag_separator simply does not appear if the variable behind it is blank
#but right now its not important, plus many tokens are lost when converting to human text. its more effective doing tag based inputs rather than human-readable text
exit()
print("Arguments: " + str(args)) print("Arguments: " + str(args))
import json import json
@ -41,6 +68,7 @@ def ConvCommaAndUnderscoreToHuman(convtohuman, input):
tars = tars.replace('_', ' ') tars = tars.replace('_', ' ')
elif convtohuman == False: elif convtohuman == False:
print("CommaAndUnderscoreToHuman: convtohuman is false hence not doing anything") print("CommaAndUnderscoreToHuman: convtohuman is false hence not doing anything")
return tars
def ConvTagsToHuman(convtohuman, input): def ConvTagsToHuman(convtohuman, input):
tars = input tars = input
@ -62,6 +90,7 @@ def ConvTagsToHuman(convtohuman, input):
tars = tars.replace('6boys', 'six boys') tars = tars.replace('6boys', 'six boys')
elif convtohuman == False: elif convtohuman == False:
print("ConvTagsToHuman: convtohuman is false hence not doing anything") print("ConvTagsToHuman: convtohuman is false hence not doing anything")
print("TARS is: " + tars)
return tars return tars
#Converts ratings to X content #Converts ratings to X content
@ -93,6 +122,7 @@ def ConvCharacterToHuman(convtohuman, input):
tars = tars.replace(')', '') tars = tars.replace(')', '')
elif convtohuman == False: elif convtohuman == False:
print("ConvCharacterToHuman: convtohuman is false hence not doing anything") print("ConvCharacterToHuman: convtohuman is false hence not doing anything")
return tars
# unrecog_ans = True # unrecog_ans = True
# while unrecog_ans: # while unrecog_ans:
@ -108,6 +138,7 @@ def ConvCharacterToHuman(convtohuman, input):
# unrecog_ans = True # unrecog_ans = True
convtohuman = args.convtohuman convtohuman = args.convtohuman
acceptedRatings = args.rating
##Open the file ##Open the file
json_file_path = args.jsonpath ##Name of the JSON file to use, converted into parser arg json_file_path = args.jsonpath ##Name of the JSON file to use, converted into parser arg
@ -184,24 +215,35 @@ for json_str in json_list:
try: try:
img_ext = result['file_ext'] img_ext = result['file_ext']
except Exception: except Exception:
file_ext = None img_ext = None
print("failed to get img_ext") print("img_ext RETRIVAL FAILED. VAR IS ESSENTIAL SO SKIPPING ENTRY.")
continue continue
try: try:
img_rating = result['rating'] img_rating = result['rating']
except Exception: except Exception:
img_rating = None img_rating = None
print("failed to get img_rating") print("img_rating RETRIVAL FAILED. VAR IS ESSENTIAL SO SKIPPING ENTRY.")
continue continue
baru = img_rating in acceptedRatings
# print("HEYYYYYYYYYYYYYYYY " + str(baru))
if str(baru) == "False":
print("Entry rating' is not in acceptedRatings, skipping entry.")
continue
elif str(baru) == "True":
print("Entry rating matches!")
file_path = str(args.imagespath) + "/0" + img_id_last3 + "/" + img_id + "." + img_ext file_path = str(args.imagespath) + "/0" + img_id_last3 + "/" + img_id + "." + img_ext
if os.path.exists(file_path): if os.path.exists(file_path):
shutil.copyfile(file_path, args.extractpath + '/' + img_id + "." + img_ext) shutil.copyfile(file_path, args.extractpath + '/' + img_id + "." + img_ext)
##Essential ##Essential
FinalTagStringGeneral = ConvCommaAndUnderscoreToHuman(convtohuman, img_tag_string_general) FinalTagStringGeneral = ConvCommaAndUnderscoreToHuman(convtohuman, img_tag_string_general)
print(FinalTagStringGeneral)
FinalTagStringGeneral = ConvTagsToHuman(convtohuman, FinalTagStringGeneral) FinalTagStringGeneral = ConvTagsToHuman(convtohuman, FinalTagStringGeneral)
##Not essential ##Not essential
@ -227,6 +269,8 @@ for json_str in json_list:
else: else:
print("CE 3NE") print("CE 3NE")
print("IMAGE RATING IS: " + img_rating)
if img_rating != None: if img_rating != None:
FinalTagStringRating = ConvRatingToHuman(convtohuman, img_rating) FinalTagStringRating = ConvRatingToHuman(convtohuman, img_rating)
elif img_rating == None: elif img_rating == None:
@ -240,10 +284,18 @@ for json_str in json_list:
elif convtohuman == False: elif convtohuman == False:
dan_iden = 'danbooru' dan_iden = 'danbooru'
tag_separator = ' ' tag_separator = ' '
# print('FinalTagStringCharacter is: ' + FinalTagStringCharacter)
# print('tag_separator is: ' + tag_separator)
# print('FinalTagStringArtist is: ' + FinalTagStringArtist)
# print('FinalTagStringRating is: ' + FinalTagStringRating)
# print('FinalTagStringGeneral is: ' + FinalTagStringGeneral)
# print('FinalTagStringCopyright is: ' + FinalTagStringCopyright)
to_write = FinalTagStringCharacter + tag_separator + FinalTagStringArtist + tag_separator + FinalTagStringRating + tag_separator + FinalTagStringGeneral + tag_separator + FinalTagStringCopyright to_write = FinalTagStringCharacter + tag_separator + FinalTagStringArtist + tag_separator + FinalTagStringRating + tag_separator + FinalTagStringGeneral + tag_separator + FinalTagStringCopyright
txt_name = args.extractpath + "/" + img_id + '.txt' txt_name = args.extractpath + "/" + img_id + '.txt'
writefile(txt_name, to_write) writefile(txt_name, to_write)
current_saved_file_count = current_saved_file_count + 1 current_saved_file_count = current_saved_file_count + 1
elif os.path.exists(file_path) == False:
print("Failed to find path.")
print("finished process. Your extracted data should be in " + args.extractpath + " !") print("finished process. Your extracted data should be in " + str(args.extractpath) + " !")