# This "OpenCV + Python" code demostrates various morphological operations that
# can be used to denoise and clean scanned images.  This is intended to serve a
# pre-processing operations to improve the image before OCR in tesseract.
# Ref: stackoverflow.com/questions/increase-image-brightness-without-overflow
#------= ------= -----= -----= -----= -----= -----= -----= -----= -----= -----=-
import cv2
import numpy as np
thresh = 100
ki = 5
kd = 5

# Block size for adaptive thresholind - blockSize determines the size of the 
# neighbourhood area and the threshold value is a gaussian-weighted sum of the 
# neighbourhood values minus the constant C. The value of kb should be an ODD
# number - even number shall throw an error.
kb = 81
#Constant for Adapatic Thresholdings
c = 5

imgGray = cv2.imread('Sample.jpg', 0)
cv2.imwrite('imgGray.jpg', imgGray)

#------= ------= -----=-SHARPENING KERNEL-----= -----= -----= -----= -----= ----
# Though it is not much effective on texts where the thickness of coloured pixel
# is in the range 3 to 5 pixels only

# Apply kernels to the cleaned image to get the sharpened image
kernSharpen1 = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
kernSharpen2 = np.array([[ 0, -1,  0], [-1, 5, -1], [ 0, -1,  0]])

# Laplacian filter or kernel (second order derivative) with diagonal terms
kernSharpen3 = np.array([[ 1,  1,  1], [ 1, -8, 1], [ 1,  1,  1]])
# Laplacian filter or kernel (second order derivative) without diagonal terms
kernSharpen4 = np.array([[ 0,  1,  1], [ 1, -4, 1], [ 0,  1,  0]])
#------= ------= -----= -----= -----= -----= -----= -----= -----= -----= -----=-

kernErode = np.ones((ki, ki), np.uint8)
imgEroded = cv2.erode (imgGray, kernErode, iterations = 1)
#cv2.imshow('Image after Erosion Operation', imgEroded)

kernDilate = np.ones((kd, kd), np. uint8)
imgDilated = cv2.dilate (imgGray, kernDilate, iterations = 1)
#cV2.imshow(" Image after Dilation Operation', imgDilated )

imgDilateSharp = cv2.filter2D(imgDilated, -1, kernSharpen4)
#cv2.imshow('Image sharpened', imgDilateSharp)

imgBlurred = cv2.medianBlur(imgDilateSharp, kb)
#cv2.imshow('Image after Dilation and Blurring Operations', imgBlurred)

#imgCleaned = imgGray - imgBlurred
imgCleaned = 255 - cv2.absdiff(imgGray, imgBlurred)
#cv2.imshow('Cleaned Image', imgCleaned)

ret, imgTh1 = cv2.threshold(imgCleaned, thresh, 255, cv2.THRESH_BINARY)
#cv2.imshow('Cleaned Image after Threshold', imgThresh) 
a1 = cv2.NORM_MINMAX
a2 = cv2.CV_8UC1
imgN1 = cv2.normalize(imgCleaned,None, alpha=0,beta=255, norm_type=a1,dtype=a2)
#cv2.imshow('Normalized Image', imgN1)

# Reverse Threshold: set anything below X to 0 (black) trial-&-error needed
ret, imgTh3 = cv2.threshold(imgN1, 200, 0, cv2.THRESH_TRUNC)
imgN2  = cv2.normalize(imgTh3, None, alpha=0,beta=255, norm_type=a1,dtype=a2)
#cv2.imshow('Normalized Image', imgN2)

# Save image: both JPG and PNG formats result in almost same file size
cv2.imwrite('imgCleaned.jpg', imgN2)

#------= ------= -----= -----= -----= -----= -----= -----= -----= -----= ------
imgThMean = cv2.adaptiveThreshold(imgGray,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
                cv2.THRESH_BINARY,kb,c)
imgThGaus = cv2.adaptiveThreshold(imgGray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
                cv2.THRESH_BINARY,kb,c)
cv2.imwrite('imgThreshGaus.jpg', imgThGaus)
     
#------= ------= -----=-APPLY SHARPENING KERNEL------= ------= -----=----------
imgSharp = cv2.filter2D(imgN2, -1, kernSharpen4)
#cv2.imwrite('imgSharp.jpg', imgSharp)

#Add filtered image and the original input image (to obtain sharpened image)
#This may not work on texts - works on images with discrete shapes
imgSharpened = np.clip(imgSharp + imgN2, 0, 255)
#cv2. imwrite('imgSharpC1ipped.jpg', imgSharpened)