Saturday, August 1, 2020

Graph2Data in Python with Bounding Box and Extracted Data Scaling

This code generates a bounding box and extract data from graph in example image. Interestingly, this code project also extends my PhD research work from manual to automated extraction from image data. Here, I show a basic example. The example graph:


The input graph is manually generated.


The sampled extracted data is displayed with bounding box.


To scale, minimum and maximum axes ranges are taken from example image.


Finally, the output extracted and scaled data plot.


# 2020 Dr Tariq Javid, Hamdard University

get_ipython().run_line_magic('matplotlib', 'inline')
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.patches import Rectangle

def data_line_center(x): 
    s = 0 # start 
    c = 0 # count
    dat_val = 0.
    vect_len = len(x)
    for idx in range(vect_len):
        if x[idx] == 0:
            c = c + 1
    for idx in range(vect_len):
        if x[idx] == 0:
            s = idx
            break
    dat_val = s + c / 2.
    return dat_val

def bound_bx(arr, a, b, c, d):
    # print a, b, c, d
    right_x  = 0
    right_y  = 0
    left_x   = 0
    left_y   = 0
    top_x    = 0
    top_y    = 0
    bottom_x = 0
    bottom_y = 0
    new_arr  = np.zeros_like(arr)
    # top_xy
    for idx in range(a, c):
        for idy in range(b, d):
            if arr[idx,idy] == 0.:
                new_arr[idx,idy] = 1.
                break
        else:
            continue
        break
    # left_xy
    for idy in range(b, d):
        for idx in range(a, c):
            if arr[idx,idy] == 0.:
                new_arr[idx,idy] = 1.
                break
        else:
            continue
        break
    # bottom_xy
    arr = np.flipud(arr)
    new_arr = np.flipud(new_arr)
    for idx in range(a, c):
        for idy in range(b, d):
            if arr[idx,idy] == 0.:
                new_arr[idx,idy] = 1.
                break
        else:
            continue
        break
    arr = np.flipud(arr)
    new_arr = np.flipud(new_arr)    
    # right_xy
    arr = np.fliplr(arr)
    new_arr = np.fliplr(new_arr)
    for idy in range(b, d):
        for idx in range(a, c):
            if arr[idx,idy] == 0.:
                new_arr[idx,idy] = 1.
                break
        else:
            continue
        break
    arr = np.fliplr(arr)
    new_arr = np.fliplr(new_arr)
    # transform data to information  
    return new_arr

def point_dat(arr, a, b, c, d): # binarization
    new_arr = np.ones_like(arr)
    for idx in range(a, c):
        for idy in range(b, d):
            if arr[idx,idy] < 0.5:
                new_arr[idx,idy] = 0.
            else:
                new_arr[idx,idy] = 1.
    return new_arr

def bound_bx_points(arr, nrows, ncols):
    arr4 = np.zeros((4,2))
    n = 0
    for idx in range(0, nrows):
        for idy in range(0, ncols):
            if arr[idx,idy] == 1.:
                arr4[n,0] = idx
                arr4[n,1] = idy
                n = n + 1
    new_arr = arr4
    return new_arr

def scale_dat(x,m,n,d,t,s): 
    arr = np.zeros([m,3])   # 
    cd  = 0
    ct  = 0
    # select data as per dat_range
    for idy in range(m):
        cd = round(idy * d)
        arr[idy,0] = cd
        arr[idy,1] = x[cd]
        arr[idy,2] = s + idy
    # scale data 
    arr[:,1] = arr[:,1] - min(arr[:,1]) # 
    arr[:,1] = arr[:,1] / max(arr[:,1]) # data in [0,1]
    arr[:,1] = abs(arr[:,1] - 1)        # data flip up down
    arr[:,1] = arr[:,1] * n             # temp range incorporated
    arr[:,1] = arr[:,1] + t             # minimum temp added
    return arr

def main():
    img_in = mpimg.imread('example066_IP.png') # Fig 6.6, 1851--2000 (150 years)
    img_gray  = rgb2gray(img_in)
    img_gray  = (img_gray - np.min(img_gray)) / (np.max(img_gray) - np.min(img_gray))
    arr = img_gray
    no_rows = arr.shape[0] #  315
    no_cols = arr.shape[1] # 1050
    arr2 = point_dat(arr, 0, 0, no_rows, no_cols)  # arr2 is binary
    arr3 = bound_bx(arr2, 0, 0, no_rows, no_cols)  # arr3 has bounding box data 
    arr4 = bound_bx_points(arr3, no_rows, no_cols) # arr4 has bounding box corner points
    x_top_left = int(min(arr4[:,0]))
    y_top_left = int(min(arr4[:,1]))
    x_bottom_right = int(max(arr4[:,0]))
    y_bottom_right = int(max(arr4[:,1]))
    x_lower_left = int(max(arr4[:,0]))
    y_lower_left = int(min(arr4[:,1]))
    rect_width   = y_bottom_right - y_top_left
    rect_height  = x_bottom_right - x_top_left
    plt.figure(figsize=(20,10)) 
    ax = plt.gca()
    rect = Rectangle((y_top_left,x_top_left),rect_width,rect_height,linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)
    # plt.axvline(x=100,color='red')
    dat_start = 1851  # start year
    dat_end   = 2000  # end year
    tmp_min   = 17.40 # minimum temperature
    tmp_max   = 18.77 # maximum temperature
    dat_range = dat_end - dat_start
    tmp_range = tmp_max - tmp_min
    # print (dat_range,tmp_range)
    dat_adjust= float(rect_width) / float(dat_range)
    tmp_adjust= float(rect_height) / float(tmp_range)
    vect_dat = np.zeros(rect_width)
    plt.imshow(img_in)
    n = 0
    for y in range(y_top_left, y_bottom_right):
        dat_val  = data_line_center(arr2[:,y])
        vect_dat[n] = dat_val 
        n = n + 1
        if (n % 10 == 0):
            plt.scatter(y,dat_val,color='b')
    output_dat = scale_dat(vect_dat,dat_range,tmp_range,dat_adjust,tmp_min,dat_start)
    x = output_dat[:,0]
    y = output_dat[:,1]
    z = output_dat[:,2]
    plt.figure(figsize=(20,4)) 
    plt.plot(z,y,color='g')

if __name__ == "__main__":
    main()




No comments:

Post a Comment