finished preprocessing

edc55ac2 · pjtka · 39b53e4c · edc55ac2 · edc55ac2
Commit edc55ac2 authored 4 years ago by pjtka
--- a/image_cropping.py
+++ b/image_cropping.py
@@ -9,6 +9,7 @@ import heapq
 import color_constancy as cc
 import os
 import time
+import pandas as pd

 plt.close('all')

@@ -17,7 +18,7 @@ time_zero = time.time()
 width = 600
 height = 450
 preserve_size = 600
-paths = [r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\\']
+paths = [r'C:\Users\ptrkm\OneDrive\Dokumenter\Bachelor deep learning\Data ISIC\ISIC_2019_Training_Input\\']
 return_folder = r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\return\\'
 # paths = [r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImages\\']
 # return_folder = r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImagesOutput\\'
@@ -27,7 +28,7 @@ margin = 0.1
 crop_black = True
 k = 200
 threshold = 0.7
-resize = False
+resize = True
 use_color_constancy = True
 write_to_png = False
 write = True
@@ -38,22 +39,28 @@ use_cropping = False
 errors = []
 area_threshold = 0.80
 for i, j in enumerate(os.listdir(paths[0])):
-    # if j == 'ISIC_0000006.jpg':
+     # if j == 'ISIC_0000031_downsampled.jpg':
+
+    if i > 2900:
+        if i == 2901:
+            t2 = time.time()
+            print("i have started"+ str(t2-time_zero))
+

-    if j!= 'return':
        try:
            image = cv2.imread(paths[0]+j)
-            print("yes man")
+
        except:
            print("File " + j + "Could not read :(")
            errors.append(j)
            continue
-        print("hej")
-        print(j)
+
+

        if crop_black:

            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            gray_image = gray_image ** 1.5
            threshold_level = threshold_otsu(gray_image)
            gray_image = ndimage.gaussian_filter(gray_image, sigma=np.sqrt(2))
            binary_image = gray_image < threshold_level
@@ -65,17 +72,18 @@ for i, j in enumerate(os.listdir(paths[0])):
            mean_bottom = np.mean(image[n-k:,m // 2 - m // 2:m // 2 + k // 2])
            mean_middle = np.mean(image[n // 2 - k:n // 2 + k,m // 2 - k:m // 2 + k])

-
            if mean_middle > np.max([mean_left,mean_top]):
                binary_image = gray_image > threshold_level
            # We now find features in the binarised blobs

            blob_labels = measure.label(binary_image)
            blob_features = measure.regionprops(blob_labels)
+
            if blob_features:
                largest_blob_idx = np.argmax(np.asarray([blob_features[i].area for i in range(len(blob_features))]))
                largest_blob = blob_features[largest_blob_idx]
                radius = np.mean([largest_blob.major_axis_length, largest_blob.minor_axis_length]) / 2
+                equivalent_diameter = largest_blob.equivalent_diameter

                x_min = (largest_blob.centroid[1] - radius + margin * radius).astype(int)
                x_max = (largest_blob.centroid[1] + radius - margin * radius).astype(int)
@@ -97,9 +105,7 @@ for i, j in enumerate(os.listdir(paths[0])):
                    y_max = (largest_blob.centroid[0] + rad - margin * rad).astype(int)

                    if x_min < 0 or x_max > image.shape[1] or y_min < 0 or y_max > image.shape[0]:
-
                        break
-
                    area_coefficient = np.sum(binary_image[(y_center-rad).astype(int):(y_center + rad).astype(int),
                                              (x_center-rad).astype(int):(x_center+rad).astype(int)])/largest_blob.area
                    if area_coefficient >= area_threshold:
@@ -141,10 +147,9 @@ for i, j in enumerate(os.listdir(paths[0])):
                if np.sum(binary_image)/(n*m)<0.05 or np.sum(binary_image)/(n*m)>0.95:
                    use_cropping = False

-
            if use_cropping:
                image = image[y_min:y_max, x_min:x_max, :]
-
+            if image.shape[0] > 0 and image.shape[1] > 0 and image.shape[2] > 0:
                if resize:
                    if preserve_ratio:
                        if image.shape[0] > image.shape[1]:
@@ -152,17 +157,26 @@ for i, j in enumerate(os.listdir(paths[0])):

                        if image.shape[1] != preserve_size:
                            ratio = preserve_size / image.shape[1]
-
-                        image = cv2.resize(image, dsize=[(round(image.shape[0] * ratio)).astype(int), preserve_size])
+                            try:
+                                image = cv2.resize(image, dsize=(round(image.shape[0] * ratio), preserve_size))
+                            except:
+                                print("resize problem on image" + j)
+                                errors.append(j)
+                                continue
                    else:
                        if image.shape[0] > image.shape[1]:
                            image = np.moveaxis(image, [0, 1, 2], [1, 0, 2])
                        if image.shape[0] != standard_size[0] or image.shape[1] != standard_size[1]:
-                        image = cv2.resize(image, dsize=[standard_size])
+                            image = cv2.resize(image, dsize=(standard_size[0], standard_size[1]))
                if use_color_constancy:
-
+                    try:
                        R, G, B, new_image = cc.general_color_constancy(image, 0, 6, 0)
                        new_image = np.uint8(new_image)
+                    except:
+                        print("resize problem on image" + j)
+                        errors.append(j)
+                        continue
+
                else:
                    new_image = image

@@ -174,9 +188,17 @@ for i, j in enumerate(os.listdir(paths[0])):
                        im = Image.fromarray(new_image.astype('uint8')).convert('RGB')

                        im.save(return_folder + j)
+            else:
+                errors.append(j)
+
+            if i % 100==0: print(i)
+

-            if i % 1000: print(i)

 time_one = time.time()
+errors_total = pd.DataFrame()
+
+errors_total['all_errors'] = errors

+errors_total.to_excel(r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\return\errors.xlsx')
 print(time_one-time_zero)
--- a/trash_pickup.py
+++ b/trash_pickup.py
+
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+from PIL import Image
+from skimage.filters import threshold_otsu
+from skimage import measure
+from scipy import ndimage, signal
+import heapq
+import color_constancy as cc
+import os
+import time
+import pandas as pd
+
+
+
+width = 600
+height = 450
+preserve_size = 600
+paths = [r'C:\Users\ptrkm\OneDrive\Dokumenter\Bachelor deep learning\Data ISIC\ISIC_2019_Training_Input\\']
+return_folder = r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\return\\'
+# paths = [r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImages\\']
+# return_folder = r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImagesOutput\\'
+standard_size = np.asarray([height, width])
+preserve_ratio = True
+margin = 0.1
+crop_black = True
+k = 200
+threshold = 0.7
+resize = True
+use_color_constancy = True
+write_to_png = False
+write = True
+ind = 1
+all_heights = 0
+all_width = 0
+use_cropping = False
+errors = []
+area_threshold = 0.80
+
+full_data = os.listdir(paths[0])
+cropped_data = os.listdir(return_folder)
+
+unused_data = list(set(full_data)-set(cropped_data))
+
+
+
+
+
+
+for i,images in enumerate(cropped_data):
+    try:
+        image = cv2.imread(return_folder+images)
+
+        if image.shape[0] < 50 or image.shape[1] < 50:
+            image = cv2.imread(paths[0]+images)
+
+            if resize:
+                if preserve_ratio:
+                    if image.shape[0] > image.shape[1]:
+                        image = np.moveaxis(image, [0, 1, 2], [1, 0, 2])
+
+                    if image.shape[1] != preserve_size:
+                        ratio = preserve_size / image.shape[1]
+                        try:
+                            image = cv2.resize(image, dsize=(round(image.shape[0] * ratio), preserve_size))
+                        except:
+                            print("resize problem on image" + images)
+                            errors.append(images)
+                            continue
+            R, G, B, new_image = cc.general_color_constancy(image, 0, 6, 0)
+            new_image = np.uint8(new_image)
+
+            im = Image.fromarray(new_image.astype('uint8')).convert('RGB')
+
+            im.save(return_folder + images)
+    except:
+        print(images)
+        continue
+
+    if i % 100 == 0: print(i)
+
+