#!/usr/bin/python3 # =================================================================== # Build a HTML file (index.html) containing links to selected # files in a directory # ------------------------------------------------------------------- # This script assumes that only a single directory will be # searched/processed. Therefore, there will be no duplicate # file names. If the script is modified to search/process # more than one directory, duplicate file names are possible. # ------------------------------------------------------------------- # The web server may try to execute the file pointed to by the link # created by this script. (For example, python.) This script can # copy and rename a file so it can be displayed as text. # For example: xxxx.py -> xxxx_PY # =================================================================== import re import os import datetime import shutil # ------------------------------------------------------------------- # global variables, constants, etc. # ------------------------------------------------------------------- AUTHOR = 'Tom Wolfe' # HTML file author CSSFILE = 'xxxx.css' # HTML CSS file DIRECTORY = './' # directory to be searched/processed # ending '/' character required FILEMATCHPATS = [ r'\.html$', # file name patterns - match regular expresion r'\.pdf$', r'\.png$', r'\.txt$', r'\.css$' ] FILERENAMEPATS = [ r'\.py$', # file name patterns - rename regular expressions r'\.bat$' ] FILESKIPPATS = ['^index.html$'] # file name patterns - skip regular expressions REPLACEMENTSTRS = [ # file name patterns - rename match and replace strings (r'\.py$','_PY'), (r'\.bat$','_BAT') ] OUTFILE = './index.html' # output file VERBOSE = True # ------------------------------------------------------------------- # output start web page # ------------------------------------------------------------------- def start_web_page(ofile,dir,author=None,stylesheet=None): ofile.write('<!DOCTYPE html>\n') ofile.write('<html>\n') ofile.write('<head>\n') ofile.write('<meta charset="utf-8" />\n') if author is not None: ofile.write('<meta name="author" content="{}" />\n'. format(author)) if stylesheet is not None: ofile.write('<link rel="stylesheet" href="{}" />\n'. format(stylesheet)) ofile.write('</head>\n') ofile.write('<body>\n') ofile.write('<header>\n') ofile.write('<center>Dir: {}</center>\n'.format(dir)) ofile.write('</header>\n') ofile.write('<div class="indent12">\n') # ------------------------------------------------------------------- # output end of web page # ------------------------------------------------------------------- def end_web_page(ofile): d = datetime.datetime.now() dd = d.strftime('%B %Y') ofile.write('</div>\n') ofile.write('<footer>\n') ofile.write('<modate>Last Modified: {}</modate>\n'.format(dd)) ofile.write('</footer>\n') ofile.write('</body>\n') ofile.write('</html>') # ------------------------------------------------------------------- # test if a string matches one of a list of regular expressions # # Regular expressions use the backslash character ('\') to # indicate special forms or to allow special characters to # be used without invoking their special meaning. This collides # with Python’s usage of the same character for the same purpose # in string literals. The solution is to use Python’s raw string # notation for regular expression patterns; backslashes are not # handled in any special way in a string literal prefixed with 'r'. # r"\n" is a two-character string containing. # # For example to match html files: 'r\.html$' or '\\.html$' # ------------------------------------------------------------------- def string_match_pattern(patterns,str): for p in patterns: if re.search(p,str,re.IGNORECASE): return True return False # ------------------------------------------------------------------- # return a list (dictionary) of selected file names # # dir directory to search/process # spat list of file skip regular expressions # mpat list of file match regular expressions # rpat list of file rename regular expressions # ------------------------------------------------------------------- def get_list_of_files(dir,spat,mpat,rpat): mdct = {} # file dictionary (list of file) rdct = {} # file dictionary (list of file) # --- get a list of entries in the directory files = os.listdir(dir) # ---- add files to the list for f in files: # ---- file path and name ff = dir + f # ---- skip hidden files (file name starts with a '.') if re.search('^\.',f): ##print('skipping hidden file {}'.format(ff)) continue # ---- skip links and directories if os.path.islink(ff): ##print('skipping link {}'.format(ff)) continue if os.path.isdir(ff): ##print('skipping dir {}'.format(ff)) continue # ---- skip the file name? if string_match_pattern(spat,f): ##print('skipping file {}'.format(f)) continue # ---- match patterm matches the file name? if string_match_pattern(mpat,f): ##print('pattern matches file {}'.format(f)) # ---- save the selected file name in a dictionary # ---- dictionary key = file name # ---- dictionary value = path + file name ##print('adding match file {}'.format(f)) mdct[f] = ff continue # ---- rename pattern matches the file name? if string_match_pattern(rpat,f): ##print('rename patterm matches file {}'.format(f)) # ---- save the selected file name in a dictionary # ---- dictionary key = file name # ---- dictionary value = path + file name ##print('adding rename file {}'.format(f)) rdct[f] = ff # ---- return the dictionarys (list of file) return (mdct,rdct) # ------------------------------------------------------------------- # add links to the output web page # # ofile output file # mdct is a dictionary containing matched file names # dictionary key = file name # dictionary value = path + file name # rdct is a dictionary containing rename file names # dictionary key = file name # dictionary value = path + file name # rstrs is a list of file name replacement strings # (list entryies are tuples) # [0] regexp pattern (string to replace) # [1] replacment string # ------------------------------------------------------------------- def create_web_page_links(ofile,dir,mdct,rdct,rstrs): # --------------------------------------------------------------- # copy and rename a file into the same directory # dir directory path # orgf original file name # newf new file name # rstrs file name replacement strings # ---- # note: if you want to just rename the file # os.rename(dir+orgf, dir+newf) # --------------------------------------------------------------- def copy_and_rename_file(dir,orgf,newf): ##print('copy_and_rename_file({},{},{})'.format(dir,orgf,newf)) o = dir + orgf # original file (path + name) n = dir + newf # new file (path + name) if o == n: print('Error: original and new files have the ' + 'same name ({})\n'.format(o)) return False shutil.copy(o,n) return True # --------------------------------------------------------------- # write regular links to the otput file # mdct: # dictionary key = file name # dictionary value = path + file name # --------------------------------------------------------------- def create_regular_links(ofile,dir,mdct): ##print('\nCreate_regular_links()') ofile.write('<p>\n') c = 0 # link count for k in sorted(mdct.keys()): if c != 0: ofile.write('<br>\n') ofile.write('<a href="{}">{}</a>\n'.format(mdct[k],k)) ##print(' {}'.format(k)) c += 1 # increment link count ofile.write('</p>\n') return True # --------------------------------------------------------------- # create new file name # --------------------------------------------------------------- # fn file name # rstrs replacement strings list (list entries are tuples) # [0] regexp (string to replace) # [1] replacement string # --------------------------------------------------------------- # returns a tuple # [0] found a match - true/False # [1] new file name # --------------------------------------------------------------- def create_new_file_name(fn,rstrs): ##print('create_new_name({})'.format(fn)) for p in rstrs: if re.search(p[0],fn,re.IGNORECASE) is not None: nn = re.sub(p[0],p[1],fn,flags=re.IGNORECASE) ##print('new file name ({}) -> ({})'.format(fn,nn)) return (True,nn) return (False,'') # --------------------------------------------------------------- # write rename links to the output file # rdct: # dictionary key = file name # dictionary value = path + file name # --------------------------------------------------------------- def create_renamed_links(ofile,dir,rdct,rstrs): ##print('create_renamed_links()') ofile.write('<h2>Rename these files</h2>\n') ofile.write('<p>\n') c = 0 # link count for k in sorted(rdct.keys()): # ---- new file name? (err,nn) = create_new_file_name(k,rstrs) if err is False: return # ---- copy original file to new file copy_and_rename_file(dir,k,nn) # ---- write link to output file if c != 0: ofile.write('<br>\n') ofile.write('<a href="{}">{}</a>\n'.format(dir+nn,nn)) ##print(' {}'.format(nn)) c += 1 # increment link count ofile.write('</p>\n') # --------------------------------------------------------------- # ---- function's main code ------------------------------------- # --------------------------------------------------------------- # ---- create regular links? if len(rdct) > 0: create_regular_links(ofile,dir,mdct) # ---- any replacement strings defined? if rstrs is None or len(rstrs) < 1: print('Error: no file name replacement strings\n') return False # ---- create replacement links? if len(rdct) > 0: create_renamed_links(ofile,dir,rdct,rstrs) # ------------------------------------------------------------------- # verbose runtime # ------------------------------------------------------------------- def verbose_runtime(): print('DIRECTORY : {}'.format(DIRECTORY)) print('OUTPUT FILE : {}'.format(OUTFILE)) for p in FILESKIPPATS: print('FILE SKIP PATTERN : {}'.format(p)) for p in FILEMATCHPATS: print('FILE MATCH PATTERN : {}'.format(p)) for p in FILERENAMEPATS: print('FILE RENAME PATTERN: {}'.format(p)) for r in REPLACEMENTSTRS: print('REPLACEMENT STRS : {}'.format(r)) # =================================================================== # main # =================================================================== if __name__ == '__main__': # ---- does the directory to search/process exists? if not os.path.isdir(DIRECTORY): print() print('No directory found') print('Output file NOT created or modified') print('DIRECTORY : {}'.format(DIRECTORY)) print('OUTPUT FILE : {}'.format(OUTFILE)) print() quit() # ---- fix the directory name string (if we need too) # ---- it must end in '/' or be empty # ---- (belt and suspenders - double check) if len(DIRECTORY) > 0: if not re.search('\/$',DIRECTORY): DIRECTORY = DIRECTORY + '/' # ---- get lists (dictionaries) of selected files (mdct,rdct) = get_list_of_files(DIRECTORY,FILESKIPPATS, FILEMATCHPATS,FILERENAMEPATS) # ---- any files found to process? if len(mdct) == 0 and len(rdct) == 0: print() print('No files found in directory to process') print('Output file NOT created or modified') if VERBOSE: verbose_runtime() print() quit() # ---- display verbose messages? if VERBOSE: verbose_runtime() # ---- create output file ofile = open(OUTFILE,"w",encoding="utf-8") start_web_page(ofile,DIRECTORY,AUTHOR,CSSFILE) create_web_page_links(ofile,DIRECTORY,mdct,rdct,REPLACEMENTSTRS) end_web_page(ofile) ofile.close() print() print('{:3} match links written to output file'.format(len(mdct))) print('{:3} renamed links written to output file'.format(len(rdct))) print()