#! /usr/bin/python3 # =================================================================== # create combined pdf file from html files # # Based on: www.geeksforgeeks.org/python-convert-html-pdf/ # Documentation: pdfkit.org/docs/guide.pdf # # Based on: stackoverflow.com/questions/3444645/merge-pdf-files # # look at... # www.programcreek.com/python/example/100586/pdfkit.from_file # =================================================================== import pdfkit from PyPDF2 import PdfFileMerger import os, sys options = { 'page-size': 'Letter', 'margin-top': '0.5in', 'margin-right': '0.5in', 'margin-bottom': '0.5in', 'margin-left': '0.5in', 'encoding': "UTF-8", 'custom-header' : [ ('Accept-Encoding', 'gzip') ], 'cookie': [ ('cookie-name1', 'cookie-value1'), ('cookie-name2', 'cookie-value2'), ], 'no-outline': None } htmlfls = [ './test/index.html', './test/project_001.html', './test/project_002.html', './test/project_003.html', './test/project_004.html', './test/project_005.html', './test/project_006.html', './test/project_008.html', './test/project_010.html', './test/project_011.html', './test/project_015.html', './test/project_016.html', './test/project_017.html', './test/project_018.html', './test/project_019a.html', './test/project_019b.html', './test/project_019.html', './test/project_020.html', './test/project_021.html', './test/project_022.html', './test/project_100.html', './test/project_507.html' ] # ------------------------------------------------------------------- # ---- function: file exists # ------------------------------------------------------------------- def file_exists(f): if os.path.exists(f): return True print(f'The file {f} does not exist') return False # ------------------------------------------------------------------- # ---- function: delete files # ------------------------------------------------------------------- def delete_files(lst): cnt = 0 # file count for f in lst: if file_exists(f): os.remove(f) cnt += 1 else: print(f'The file {f} does not exist') return (False,cnt) return (True,cnt) # ------------------------------------------------------------------- # ---- function: merge pdf files into a single file # ------------------------------------------------------------------- def merge_pdf_files(pdfs,outfile='merged.pdf'): cnt = 0 # pdf file count merger = PdfFileMerger() for f in pdfs: merger.append(f) cnt += 1 merger.write(outfile) merger.close() return cnt # ------------------------------------------------------------------- # ---- function: create pdf files from html files # ------------------------------------------------------------------- def create_pdf_from_html(lst): cnt = 0 # file count pdfs = [] # pdf file names for f in lst: cnt += 1 # count input files print(f'[{cnt:03}] {f}') # display count and input file if not file_exists(f): # file exists? return (pdfs,False) # ---- convert html file to pdf file # ---- use try...erxcept... to skip errors # ---- there is a bug in 'pdfkit' that seems to have # ---- no effect on the outout file but crashes # ---- the program ff = f'./file_{cnt:03}.pdf' # output file try: pdfkit.from_file(f, ff, options=options) except: pass pdfs.append(ff) # save output file names for # later processing return (pdfs,cnt) # ------------------------------------------------------------------- # ---- function: get list files in directory # ------------------------------------------------------------------- def get_list_files_in_dir(directory): files = os.listdir(directory) return files # ------------------------------------------------------------------- # ---- function: display list files in directory # ------------------------------------------------------------------- def display_files_in_dir(directory): files = get_list_files_in_dir(directory) print() print('---files-in-dir----------------------------------') for f in files: print(f) print('-------------------------------------------------') # ------------------------------------------------------------------- # ---- main # ------------------------------------------------------------------- if __name__ == '__main__': print() print('---- create initial pdf files from html files') (pdfs,cnt) = create_pdf_from_html(htmlfls) print(f'created {cnt} pdf files') print() print('---- merge pdf files into a single pdf file') cnt = merge_pdf_files(pdfs) print(f'merged {cnt} pdf files') print() print('---- delete initial pdf files') (tf,cnt) = delete_files(pdfs) print(f'deleted {cnt} initial pdf files') # ---- exit program print()