Skip to content
Snippets Groups Projects
Commit edc55ac2 authored by pjtka's avatar pjtka
Browse files

finished preprocessing

parent 39b53e4c
No related branches found
No related tags found
1 merge request!2Prepocessing
......@@ -9,6 +9,7 @@ import heapq
import color_constancy as cc
import os
import time
import pandas as pd
plt.close('all')
......@@ -17,7 +18,7 @@ time_zero = time.time()
width = 600
height = 450
preserve_size = 600
paths = [r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\\']
paths = [r'C:\Users\ptrkm\OneDrive\Dokumenter\Bachelor deep learning\Data ISIC\ISIC_2019_Training_Input\\']
return_folder = r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\return\\'
# paths = [r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImages\\']
# return_folder = r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImagesOutput\\'
......@@ -27,7 +28,7 @@ margin = 0.1
crop_black = True
k = 200
threshold = 0.7
resize = False
resize = True
use_color_constancy = True
write_to_png = False
write = True
......@@ -38,22 +39,28 @@ use_cropping = False
errors = []
area_threshold = 0.80
for i, j in enumerate(os.listdir(paths[0])):
# if j == 'ISIC_0000006.jpg':
# if j == 'ISIC_0000031_downsampled.jpg':
if i > 2900:
if i == 2901:
t2 = time.time()
print("i have started"+ str(t2-time_zero))
if j!= 'return':
try:
image = cv2.imread(paths[0]+j)
print("yes man")
except:
print("File " + j + "Could not read :(")
errors.append(j)
continue
print("hej")
print(j)
if crop_black:
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_image = gray_image ** 1.5
threshold_level = threshold_otsu(gray_image)
gray_image = ndimage.gaussian_filter(gray_image, sigma=np.sqrt(2))
binary_image = gray_image < threshold_level
......@@ -65,17 +72,18 @@ for i, j in enumerate(os.listdir(paths[0])):
mean_bottom = np.mean(image[n-k:,m // 2 - m // 2:m // 2 + k // 2])
mean_middle = np.mean(image[n // 2 - k:n // 2 + k,m // 2 - k:m // 2 + k])
if mean_middle > np.max([mean_left,mean_top]):
binary_image = gray_image > threshold_level
# We now find features in the binarised blobs
blob_labels = measure.label(binary_image)
blob_features = measure.regionprops(blob_labels)
if blob_features:
largest_blob_idx = np.argmax(np.asarray([blob_features[i].area for i in range(len(blob_features))]))
largest_blob = blob_features[largest_blob_idx]
radius = np.mean([largest_blob.major_axis_length, largest_blob.minor_axis_length]) / 2
equivalent_diameter = largest_blob.equivalent_diameter
x_min = (largest_blob.centroid[1] - radius + margin * radius).astype(int)
x_max = (largest_blob.centroid[1] + radius - margin * radius).astype(int)
......@@ -97,9 +105,7 @@ for i, j in enumerate(os.listdir(paths[0])):
y_max = (largest_blob.centroid[0] + rad - margin * rad).astype(int)
if x_min < 0 or x_max > image.shape[1] or y_min < 0 or y_max > image.shape[0]:
break
area_coefficient = np.sum(binary_image[(y_center-rad).astype(int):(y_center + rad).astype(int),
(x_center-rad).astype(int):(x_center+rad).astype(int)])/largest_blob.area
if area_coefficient >= area_threshold:
......@@ -141,10 +147,9 @@ for i, j in enumerate(os.listdir(paths[0])):
if np.sum(binary_image)/(n*m)<0.05 or np.sum(binary_image)/(n*m)>0.95:
use_cropping = False
if use_cropping:
image = image[y_min:y_max, x_min:x_max, :]
if image.shape[0] > 0 and image.shape[1] > 0 and image.shape[2] > 0:
if resize:
if preserve_ratio:
if image.shape[0] > image.shape[1]:
......@@ -152,17 +157,26 @@ for i, j in enumerate(os.listdir(paths[0])):
if image.shape[1] != preserve_size:
ratio = preserve_size / image.shape[1]
image = cv2.resize(image, dsize=[(round(image.shape[0] * ratio)).astype(int), preserve_size])
try:
image = cv2.resize(image, dsize=(round(image.shape[0] * ratio), preserve_size))
except:
print("resize problem on image" + j)
errors.append(j)
continue
else:
if image.shape[0] > image.shape[1]:
image = np.moveaxis(image, [0, 1, 2], [1, 0, 2])
if image.shape[0] != standard_size[0] or image.shape[1] != standard_size[1]:
image = cv2.resize(image, dsize=[standard_size])
image = cv2.resize(image, dsize=(standard_size[0], standard_size[1]))
if use_color_constancy:
try:
R, G, B, new_image = cc.general_color_constancy(image, 0, 6, 0)
new_image = np.uint8(new_image)
except:
print("resize problem on image" + j)
errors.append(j)
continue
else:
new_image = image
......@@ -174,9 +188,17 @@ for i, j in enumerate(os.listdir(paths[0])):
im = Image.fromarray(new_image.astype('uint8')).convert('RGB')
im.save(return_folder + j)
else:
errors.append(j)
if i % 100==0: print(i)
if i % 1000: print(i)
time_one = time.time()
errors_total = pd.DataFrame()
errors_total['all_errors'] = errors
errors_total.to_excel(r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\return\errors.xlsx')
print(time_one-time_zero)
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from skimage.filters import threshold_otsu
from skimage import measure
from scipy import ndimage, signal
import heapq
import color_constancy as cc
import os
import time
import pandas as pd
width = 600
height = 450
preserve_size = 600
paths = [r'C:\Users\ptrkm\OneDrive\Dokumenter\Bachelor deep learning\Data ISIC\ISIC_2019_Training_Input\\']
return_folder = r'C:\Users\ptrkm\OneDrive\Dokumenter\TestFolder\return\\'
# paths = [r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImages\\']
# return_folder = r'C:\Users\Bruger\OneDrive\DTU - General engineering\6. Semester\Bachelor\ISBI2016_ISIC_Part2B_Training_Data\TestRunImagesOutput\\'
standard_size = np.asarray([height, width])
preserve_ratio = True
margin = 0.1
crop_black = True
k = 200
threshold = 0.7
resize = True
use_color_constancy = True
write_to_png = False
write = True
ind = 1
all_heights = 0
all_width = 0
use_cropping = False
errors = []
area_threshold = 0.80
full_data = os.listdir(paths[0])
cropped_data = os.listdir(return_folder)
unused_data = list(set(full_data)-set(cropped_data))
for i,images in enumerate(cropped_data):
try:
image = cv2.imread(return_folder+images)
if image.shape[0] < 50 or image.shape[1] < 50:
image = cv2.imread(paths[0]+images)
if resize:
if preserve_ratio:
if image.shape[0] > image.shape[1]:
image = np.moveaxis(image, [0, 1, 2], [1, 0, 2])
if image.shape[1] != preserve_size:
ratio = preserve_size / image.shape[1]
try:
image = cv2.resize(image, dsize=(round(image.shape[0] * ratio), preserve_size))
except:
print("resize problem on image" + images)
errors.append(images)
continue
R, G, B, new_image = cc.general_color_constancy(image, 0, 6, 0)
new_image = np.uint8(new_image)
im = Image.fromarray(new_image.astype('uint8')).convert('RGB')
im.save(return_folder + images)
except:
print(images)
continue
if i % 100 == 0: print(i)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment