From 64358ce09661156e62d9ef4437117177cba2dd27 Mon Sep 17 00:00:00 2001 From: Mikayla Dobson <93477693+innocuous-symmetry@users.noreply.github.com> Date: Fri, 6 Jan 2023 20:21:50 -0600 Subject: [PATCH] enabled file sorting based on subject and match strength --- formatresult.py | 47 +++++++++++++++++++++++++++++++++++++++ main.py | 59 ++++++++++++++++++++++++++++++++++--------------- readresult.py | 24 -------------------- 3 files changed, 88 insertions(+), 42 deletions(-) create mode 100644 formatresult.py delete mode 100644 readresult.py diff --git a/formatresult.py b/formatresult.py new file mode 100644 index 0000000..2d18fec --- /dev/null +++ b/formatresult.py @@ -0,0 +1,47 @@ +import json, os, shutil + +def format_result(src_path, json_path): + insertions_by_label = {} + + weak_results = 0 + total_count = 0 + + with open(json_path) as results: + contents = json.load(results) + + for qualifier in ['strong', 'moderate', 'fair', 'weak']: + if not os.path.exists("./predictions/" + qualifier): + os.makedirs('./predictions/' + qualifier) + + for line in contents: + img_path = src_path + line['path'] + prediction = line['prediction'] + for section in prediction: + total_count += 1 + guess_label = section[0][1] + match_strength = 'weak/' + + if float(section[0][2]) > 0.9: + match_strength = 'strong/' + elif float(section[0][2]) > 0.75: + match_strength = 'moderate/' + elif float(section[0][2]) > 0.5: + match_strength = 'fair/' + elif match_strength == 'weak/': + weak_results += 1 + + if not guess_label in insertions_by_label: + insertions_by_label[guess_label] = 0 + + if (not os.path.exists("./predictions/" + match_strength + guess_label)): + os.makedirs("./predictions/" + match_strength + guess_label) + + if (not os.path.exists('./predictions/' + match_strength + guess_label + '/' + img_path)): + shutil.copy(img_path, "./predictions/" + match_strength + guess_label) + insertions_by_label[guess_label] = insertions_by_label[guess_label] + 1 + + print(str(weak_results) + " weak result(s) of a total " + str(total_count) + " input(s)\n") + print("By subject:\n\n") + + for k, v in insertions_by_label.items(): + print(k + ": " + str(v) + " file(s) found") diff --git a/main.py b/main.py index 0221475..5d8c66b 100644 --- a/main.py +++ b/main.py @@ -3,46 +3,69 @@ # most of this application adapted from the following walkthrough: # https://towardsdatascience.com/how-to-use-a-pre-trained-model-vgg-for-image-classification-8dd7c4a4a517 -import sys, os, json, time +import sys, os, json +from time import time from predict import predict +from formatresult import format_result from keras.applications.vgg16 import VGG16 -print("\n\n\n") -print("Imports successful! Running startup processes...") +print("\n\nImage Sorting Utility\n") +print("Script by Mikayla Dobson\n") +print("\n\n") +print("Begininning setup...\n\n") -# generate current time for use in identifying outfiles -cur_time = str(int(time.time())) +############################## SETUP +############################## SETUP +############################## SETUP # create the target directory if it doesn't exist if (not os.path.exists("./predictions")): - print("Did not find predictions directory, creating...") + print("Did not find predictions directory, creating...\n\n") os.makedirs("./predictions") -# declare model to be used for each prediction -model = VGG16(weights='imagenet') - # receive directory path as CLI argument and get a list of all files in path -path = sys.argv[1] -if (path[-1] != "/"): - path += "/" +src_path = sys.argv[1] -files = os.listdir(path) +if (src_path[-1] != "/"): + src_path += "/" + +files = os.listdir(src_path) + +# generate current time for use in identifying outfiles +cur_time = str(int(time())) # store all results in one list all_results = [] -print("Running image analysis. This may take some time") +############################## ANALYSIS +############################## ANALYSIS +############################## ANALYSIS + +# declare model to be used for each prediction +model = VGG16(weights='imagenet') + +print("Running image analysis. This may take some time...\n\n") # for each file in directory, append its prediction result to main list for file in files: - result = predict(model, path + file) + result = predict(model, src_path + file) if result is not None: all_results.append({ "path": file, "prediction": result }) -print("Analysis complete! Writing JSON to ./predictions/predictions" + cur_time + ".json") +json_path = "./predictions/predictions" + cur_time + ".json" + +print("Writing analysis results to " + json_path + "\n\n") # convert object to JSON and write to JSON file -with open("./predictions/predictions" + cur_time + ".json", "w") as outfile: +with open(json_path, "w") as outfile: json.dump(all_results, outfile) -print("Process complete!") +print("Analysis complete! Beginning sort process...\n\n") + +############################## SORTING +############################## SORTING +############################## SORTING + +format_result(src_path, json_path) + +print("File sort successful! Process complete.") diff --git a/readresult.py b/readresult.py deleted file mode 100644 index 66a360b..0000000 --- a/readresult.py +++ /dev/null @@ -1,24 +0,0 @@ -import sys, json - -path = sys.argv[1] - -with open(path) as file: - contents = json.load(file) - -for line in contents: - prediction = line['prediction'] - for section in prediction: - for guess in section: - if (float(guess[2]) > 0.75): - print(line['path']) - print("Probable match: " + guess[1]) - print(guess) - print("\n") - elif (float(guess[2]) > 0.3): - print(line['path']) - print("Potential match: " + guess[1]) - print(guess) - print("\n") - # else: - # print(line['path'] + ": inconclusive") - # print("\n")