This code generates a bounding box and extract data from graph in example image. Interestingly, this code project also extends my PhD research work from manual to automated extraction from image data. Here, I show a basic example. The example graph:
The sampled extracted data is displayed with bounding box.
To scale, minimum and maximum axes ranges are taken from example image.
Finally, the output extracted and scaled data plot.
# 2020 Dr Tariq Javid, Hamdard University
get_ipython().run_line_magic('matplotlib', 'inline')
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.patches import Rectangle
def data_line_center(x):
s = 0 # start
c = 0 # count
dat_val = 0.
vect_len = len(x)
for idx in range(vect_len):
if x[idx] == 0:
c = c + 1
for idx in range(vect_len):
if x[idx] == 0:
s = idx
break
dat_val = s + c / 2.
return dat_val
def bound_bx(arr, a, b, c, d):
# print a, b, c, d
right_x = 0
right_y = 0
left_x = 0
left_y = 0
top_x = 0
top_y = 0
bottom_x = 0
bottom_y = 0
new_arr = np.zeros_like(arr)
# top_xy
for idx in range(a, c):
for idy in range(b, d):
if arr[idx,idy] == 0.:
new_arr[idx,idy] = 1.
break
else:
continue
break
# left_xy
for idy in range(b, d):
for idx in range(a, c):
if arr[idx,idy] == 0.:
new_arr[idx,idy] = 1.
break
else:
continue
break
# bottom_xy
arr = np.flipud(arr)
new_arr = np.flipud(new_arr)
for idx in range(a, c):
for idy in range(b, d):
if arr[idx,idy] == 0.:
new_arr[idx,idy] = 1.
break
else:
continue
break
arr = np.flipud(arr)
new_arr = np.flipud(new_arr)
# right_xy
arr = np.fliplr(arr)
new_arr = np.fliplr(new_arr)
for idy in range(b, d):
for idx in range(a, c):
if arr[idx,idy] == 0.:
new_arr[idx,idy] = 1.
break
else:
continue
break
arr = np.fliplr(arr)
new_arr = np.fliplr(new_arr)
# transform data to information
return new_arr
def point_dat(arr, a, b, c, d): # binarization
new_arr = np.ones_like(arr)
for idx in range(a, c):
for idy in range(b, d):
if arr[idx,idy] < 0.5:
new_arr[idx,idy] = 0.
else:
new_arr[idx,idy] = 1.
return new_arr
def bound_bx_points(arr, nrows, ncols):
arr4 = np.zeros((4,2))
n = 0
for idx in range(0, nrows):
for idy in range(0, ncols):
if arr[idx,idy] == 1.:
arr4[n,0] = idx
arr4[n,1] = idy
n = n + 1
new_arr = arr4
return new_arr
def scale_dat(x,m,n,d,t,s):
arr = np.zeros([m,3]) #
cd = 0
ct = 0
# select data as per dat_range
for idy in range(m):
cd = round(idy * d)
arr[idy,0] = cd
arr[idy,1] = x[cd]
arr[idy,2] = s + idy
# scale data
arr[:,1] = arr[:,1] - min(arr[:,1]) #
arr[:,1] = arr[:,1] / max(arr[:,1]) # data in [0,1]
arr[:,1] = abs(arr[:,1] - 1) # data flip up down
arr[:,1] = arr[:,1] * n # temp range incorporated
arr[:,1] = arr[:,1] + t # minimum temp added
return arr
def main():
img_in = mpimg.imread('example066_IP.png') # Fig 6.6, 1851--2000 (150 years)
img_gray = rgb2gray(img_in)
img_gray = (img_gray - np.min(img_gray)) / (np.max(img_gray) - np.min(img_gray))
arr = img_gray
no_rows = arr.shape[0] # 315
no_cols = arr.shape[1] # 1050
arr2 = point_dat(arr, 0, 0, no_rows, no_cols) # arr2 is binary
arr3 = bound_bx(arr2, 0, 0, no_rows, no_cols) # arr3 has bounding box data
arr4 = bound_bx_points(arr3, no_rows, no_cols) # arr4 has bounding box corner points
x_top_left = int(min(arr4[:,0]))
y_top_left = int(min(arr4[:,1]))
x_bottom_right = int(max(arr4[:,0]))
y_bottom_right = int(max(arr4[:,1]))
x_lower_left = int(max(arr4[:,0]))
y_lower_left = int(min(arr4[:,1]))
rect_width = y_bottom_right - y_top_left
rect_height = x_bottom_right - x_top_left
plt.figure(figsize=(20,10))
ax = plt.gca()
rect = Rectangle((y_top_left,x_top_left),rect_width,rect_height,linewidth=1,edgecolor='b',facecolor='none')
ax.add_patch(rect)
# plt.axvline(x=100,color='red')
dat_start = 1851 # start year
dat_end = 2000 # end year
tmp_min = 17.40 # minimum temperature
tmp_max = 18.77 # maximum temperature
dat_range = dat_end - dat_start
tmp_range = tmp_max - tmp_min
# print (dat_range,tmp_range)
dat_adjust= float(rect_width) / float(dat_range)
tmp_adjust= float(rect_height) / float(tmp_range)
vect_dat = np.zeros(rect_width)
plt.imshow(img_in)
n = 0
for y in range(y_top_left, y_bottom_right):
dat_val = data_line_center(arr2[:,y])
vect_dat[n] = dat_val
n = n + 1
if (n % 10 == 0):
plt.scatter(y,dat_val,color='b')
output_dat = scale_dat(vect_dat,dat_range,tmp_range,dat_adjust,tmp_min,dat_start)
x = output_dat[:,0]
y = output_dat[:,1]
z = output_dat[:,2]
plt.figure(figsize=(20,4))
plt.plot(z,y,color='g')
if __name__ == "__main__":
main()