''' This Python code uses OpenCV to convert coloured images to Grayscale and then the PNG files are converted back into a Grayscale or BW PDF User needs to specify threshold value of the pixel color to convert to black and white. This needs to be tried for each type of PDF file as the quality of colour varies for each image. The script finall converts the images into a PDF syntax: py pdf-To-BW-Pdf.py Input.pdf 127 1 234 where 127 is threshold value, 1 is state page and 234 is last page. Note page number starts with 1 in Ghostview ''' import cv2, PIL import fitz import glob, sys, os, subprocess from PIL import Image resolution = 100 th = int(sys.argv[2]) i = int(sys.argv[3]) #Start page j = int(sys.argv[4]) #Last page pdf_name = str(sys.argv[1]) #------------------------------------------------------------------------------ # Make directory named by variable output_dir output_dir = "PDF2PNG" os.makedirs(output_dir, exist_ok=True) file_name = os.path.basename(pdf_name) file_name = file_name.split(".")[0] png_name = output_dir + "/" + file_name + "-%04d.png" gs = 'gswin32c' if (sys.platform == 'win32') else 'gswin64' #f-Strings: syntax is similar to str.format() but less verbose subprocess.run(["gswin64", "-dBATCH", "-dNOPAUSE", "-sDEVICE=png16m", f"-r{resolution}", f"-dFirstPage={i}", f"-dLastPage={j}", f"-sOutputFile={png_name}", f"{pdf_name}"], stdout=subprocess.PIPE) #------------------------------------------------------------------------------ pdff = "PDF-Merged.pdf" pdfx = "PDF-Tempor.pdf" bwFile = "BW.png" pdfm = fitz.open() w = 842 imgList = output_dir + "/" + "*.PNG" img = Image.open(glob.glob(imgList)[0]) wc = (w/float(img.size[0])) h = int((float(img.size[1])*float(wc))) #------------------------------------------------------------------------------ for f in glob.glob(imgList): #convert the image to grayscale format: both the approaches are identical imgGray = cv2.imread(f, 0) #imgGray = cv2.cvtColor(f, cv2.COLOR_BGR2GRAY) #Convert the image to black and white, thresholding turns the border of the #object in the image completely white with all pixels having same intensity (thresh, bwImg) = cv2.threshold(imgGray, th, 255, cv2.THRESH_BINARY) cv2.imwrite(bwFile, bwImg) img2pdf = Image.open(bwFile) img2pdf.resize((w, h), Image.Resampling.LANCZOS) img2pdf.save(pdfx, "PDF", resolution=100.0) p = fitz.open(pdfx) pdfm.insertPDF(p) p.close() pdfm.save(pdff) pdfm.close() os.remove(pdfx) os.remove(bwFile) img.close() #------------------------------------------------------------------------------ #Check whether folder exists or not if os.path.exists(output_dir): # checking whether the folder is empty or not if len(os.listdir(output_dir)) == 0: os.rmdir(folder_path) else: files = glob.glob(output_dir + "/" + "*.png") for f in files: os.remove(f) os.rmdir(output_dir)