enabled file sorting based on subject and match strength

2023-01-06 20:21:50 -06:00
parent 9510e36e45
commit 64358ce096
3 changed files with 88 additions and 42 deletions
--- a/formatresult.py
+++ b/formatresult.py
@@ -0,0 +1,47 @@
+import json, os, shutil
+
+def format_result(src_path, json_path):
+    insertions_by_label = {}
+
+    weak_results = 0
+    total_count = 0
+    
+    with open(json_path) as results:
+        contents = json.load(results)
+
+    for qualifier in ['strong', 'moderate', 'fair', 'weak']:
+        if not os.path.exists("./predictions/" + qualifier):
+            os.makedirs('./predictions/' + qualifier)
+
+    for line in contents:
+        img_path = src_path + line['path']
+        prediction = line['prediction']
+        for section in prediction:
+            total_count += 1
+            guess_label = section[0][1]
+            match_strength = 'weak/'
+
+            if float(section[0][2]) > 0.9:
+                match_strength = 'strong/'
+            elif float(section[0][2]) > 0.75:
+                match_strength = 'moderate/'
+            elif float(section[0][2]) > 0.5:
+                match_strength = 'fair/'
+            elif match_strength == 'weak/':
+                weak_results += 1
+
+            if not guess_label in insertions_by_label:
+                insertions_by_label[guess_label] = 0
+
+            if (not os.path.exists("./predictions/" + match_strength + guess_label)):
+                os.makedirs("./predictions/" + match_strength + guess_label)
+
+            if (not os.path.exists('./predictions/' + match_strength + guess_label + '/' + img_path)):
+                shutil.copy(img_path, "./predictions/" + match_strength + guess_label)
+                insertions_by_label[guess_label] = insertions_by_label[guess_label] + 1
+    
+    print(str(weak_results) + " weak result(s) of a total " + str(total_count) + " input(s)\n")
+    print("By subject:\n\n")
+
+    for k, v in insertions_by_label.items():
+        print(k + ": " + str(v) + " file(s) found")
--- a/main.py
+++ b/main.py
@@ -3,46 +3,69 @@
 # most of this application adapted from the following walkthrough:
 # https://towardsdatascience.com/how-to-use-a-pre-trained-model-vgg-for-image-classification-8dd7c4a4a517

-import sys, os, json, time
+import sys, os, json
+from time import time
 from predict import predict
+from formatresult import format_result
 from keras.applications.vgg16 import VGG16

-print("\n\n\n")
-print("Imports successful! Running startup processes...")
+print("\n\nImage Sorting Utility\n")
+print("Script by Mikayla Dobson\n")
+print("\n\n")
+print("Begininning setup...\n\n")

-# generate current time for use in identifying outfiles
-cur_time = str(int(time.time()))
+############################## SETUP
+############################## SETUP
+############################## SETUP

 # create the target directory if it doesn't exist
 if (not os.path.exists("./predictions")):
-    print("Did not find predictions directory, creating...")
+    print("Did not find predictions directory, creating...\n\n")
    os.makedirs("./predictions")

-# declare model to be used for each prediction
-model = VGG16(weights='imagenet')
-
 # receive directory path as CLI argument and get a list of all files in path
-path = sys.argv[1]
-if (path[-1] != "/"):
-    path += "/"
+src_path = sys.argv[1]

-files = os.listdir(path)
+if (src_path[-1] != "/"):
+    src_path += "/"
+
+files = os.listdir(src_path)
+
+# generate current time for use in identifying outfiles
+cur_time = str(int(time()))

 # store all results in one list
 all_results = []

-print("Running image analysis. This may take some time")
+############################## ANALYSIS
+############################## ANALYSIS
+############################## ANALYSIS
+
+# declare model to be used for each prediction
+model = VGG16(weights='imagenet')
+
+print("Running image analysis. This may take some time...\n\n")

 # for each file in directory, append its prediction result to main list
 for file in files:
-    result = predict(model, path + file)
+    result = predict(model, src_path + file)
    if result is not None:
        all_results.append({ "path": file, "prediction": result })

-print("Analysis complete! Writing JSON to ./predictions/predictions" + cur_time + ".json")
+json_path = "./predictions/predictions" + cur_time + ".json"
+
+print("Writing analysis results to " + json_path + "\n\n")

 # convert object to JSON and write to JSON file
-with open("./predictions/predictions" + cur_time + ".json", "w") as outfile:
+with open(json_path, "w") as outfile:
    json.dump(all_results, outfile)

-print("Process complete!")
+print("Analysis complete! Beginning sort process...\n\n")
+
+############################## SORTING
+############################## SORTING
+############################## SORTING
+
+format_result(src_path, json_path)
+
+print("File sort successful! Process complete.")
--- a/readresult.py
+++ b/readresult.py
@@ -1,24 +0,0 @@
-import sys, json
-
-path = sys.argv[1]
-
-with open(path) as file:
-    contents = json.load(file)
-
-for line in contents:
-    prediction = line['prediction']
-    for section in prediction:
-        for guess in section:
-            if (float(guess[2]) > 0.75):
-                print(line['path'])
-                print("Probable match: " + guess[1])
-                print(guess)
-                print("\n")
-            elif (float(guess[2]) > 0.3):
-                print(line['path'])
-                print("Potential match: " + guess[1])
-                print(guess)
-                print("\n")
-            # else:
-            #     print(line['path'] + ": inconclusive")
-            #     print("\n")