enabled file sorting based on subject and match strength

This commit is contained in:
Mikayla Dobson
2023-01-06 20:21:50 -06:00
parent 9510e36e45
commit 64358ce096
3 changed files with 88 additions and 42 deletions

47
formatresult.py Normal file
View File

@@ -0,0 +1,47 @@
import json, os, shutil
def format_result(src_path, json_path):
insertions_by_label = {}
weak_results = 0
total_count = 0
with open(json_path) as results:
contents = json.load(results)
for qualifier in ['strong', 'moderate', 'fair', 'weak']:
if not os.path.exists("./predictions/" + qualifier):
os.makedirs('./predictions/' + qualifier)
for line in contents:
img_path = src_path + line['path']
prediction = line['prediction']
for section in prediction:
total_count += 1
guess_label = section[0][1]
match_strength = 'weak/'
if float(section[0][2]) > 0.9:
match_strength = 'strong/'
elif float(section[0][2]) > 0.75:
match_strength = 'moderate/'
elif float(section[0][2]) > 0.5:
match_strength = 'fair/'
elif match_strength == 'weak/':
weak_results += 1
if not guess_label in insertions_by_label:
insertions_by_label[guess_label] = 0
if (not os.path.exists("./predictions/" + match_strength + guess_label)):
os.makedirs("./predictions/" + match_strength + guess_label)
if (not os.path.exists('./predictions/' + match_strength + guess_label + '/' + img_path)):
shutil.copy(img_path, "./predictions/" + match_strength + guess_label)
insertions_by_label[guess_label] = insertions_by_label[guess_label] + 1
print(str(weak_results) + " weak result(s) of a total " + str(total_count) + " input(s)\n")
print("By subject:\n\n")
for k, v in insertions_by_label.items():
print(k + ": " + str(v) + " file(s) found")

59
main.py
View File

@@ -3,46 +3,69 @@
# most of this application adapted from the following walkthrough:
# https://towardsdatascience.com/how-to-use-a-pre-trained-model-vgg-for-image-classification-8dd7c4a4a517
import sys, os, json, time
import sys, os, json
from time import time
from predict import predict
from formatresult import format_result
from keras.applications.vgg16 import VGG16
print("\n\n\n")
print("Imports successful! Running startup processes...")
print("\n\nImage Sorting Utility\n")
print("Script by Mikayla Dobson\n")
print("\n\n")
print("Begininning setup...\n\n")
# generate current time for use in identifying outfiles
cur_time = str(int(time.time()))
############################## SETUP
############################## SETUP
############################## SETUP
# create the target directory if it doesn't exist
if (not os.path.exists("./predictions")):
print("Did not find predictions directory, creating...")
print("Did not find predictions directory, creating...\n\n")
os.makedirs("./predictions")
# declare model to be used for each prediction
model = VGG16(weights='imagenet')
# receive directory path as CLI argument and get a list of all files in path
path = sys.argv[1]
if (path[-1] != "/"):
path += "/"
src_path = sys.argv[1]
files = os.listdir(path)
if (src_path[-1] != "/"):
src_path += "/"
files = os.listdir(src_path)
# generate current time for use in identifying outfiles
cur_time = str(int(time()))
# store all results in one list
all_results = []
print("Running image analysis. This may take some time")
############################## ANALYSIS
############################## ANALYSIS
############################## ANALYSIS
# declare model to be used for each prediction
model = VGG16(weights='imagenet')
print("Running image analysis. This may take some time...\n\n")
# for each file in directory, append its prediction result to main list
for file in files:
result = predict(model, path + file)
result = predict(model, src_path + file)
if result is not None:
all_results.append({ "path": file, "prediction": result })
print("Analysis complete! Writing JSON to ./predictions/predictions" + cur_time + ".json")
json_path = "./predictions/predictions" + cur_time + ".json"
print("Writing analysis results to " + json_path + "\n\n")
# convert object to JSON and write to JSON file
with open("./predictions/predictions" + cur_time + ".json", "w") as outfile:
with open(json_path, "w") as outfile:
json.dump(all_results, outfile)
print("Process complete!")
print("Analysis complete! Beginning sort process...\n\n")
############################## SORTING
############################## SORTING
############################## SORTING
format_result(src_path, json_path)
print("File sort successful! Process complete.")

View File

@@ -1,24 +0,0 @@
import sys, json
path = sys.argv[1]
with open(path) as file:
contents = json.load(file)
for line in contents:
prediction = line['prediction']
for section in prediction:
for guess in section:
if (float(guess[2]) > 0.75):
print(line['path'])
print("Probable match: " + guess[1])
print(guess)
print("\n")
elif (float(guess[2]) > 0.3):
print(line['path'])
print("Potential match: " + guess[1])
print(guess)
print("\n")
# else:
# print(line['path'] + ": inconclusive")
# print("\n")