#!/usr/bin/python3 # =================================================================== # Build a HTML file (index.html) containing links to selected # files in a directory. A simple HTML link is created. # ------------------------------------------------------------------- # This script assumes that only a single directory will be # searched/processed. Therefore, there will be no duplicate # file names. If the script is modified to search/process # more than one directory, duplicate file names are possible. # ------------------------------------------------------------------- # The web server may try to execute the file pointed to by the # link created by this script. This simple script does nothing # about this problem. # =================================================================== import re import os import datetime # ------------------------------------------------------------------- # global variables, constants, etc. # ------------------------------------------------------------------- AUTHOR = 'Tom Wolfe' # HTML file author CSSFILE = 'xxxx.css' # HTML CSS file DIRECTORY = './' # directory to be searched/processed # ending '/' character required FILEMATCHPATS = [ r'\.html$', # file name patterns - match regular expresion r'\.pdf$', r'\.png$', r'\.py$', r'\.txt$', r'\.css$', r'\.bat$' ] FILESKIPPATS = ['^index.html$'] # file name patterns - skip regular expressions OUTFILE = './index.html' # output file # ------------------------------------------------------------------- # output start web page # ------------------------------------------------------------------- def start_web_page(ofile,dir,author=None,stylesheet=None): ofile.write('<!DOCTYPE html>\n') ofile.write('<html>\n') ofile.write('<head>\n') ofile.write('<meta charset="utf-8" />\n') if author is not None: ofile.write('<meta name="author" content="{}" />\n'. format(author)) if stylesheet is not None: ofile.write('<link rel="stylesheet" href="{}" />\n'. format(stylesheet)) ofile.write('</head>\n') ofile.write('<body>\n') ofile.write('<header>\n') ofile.write('<center>Dir: {}</center>\n'.format(dir)) ofile.write('</header>\n') ofile.write('<div class="indent12">\n') # ------------------------------------------------------------------- # output end of web page # ------------------------------------------------------------------- def end_web_page(ofile): d = datetime.datetime.now() dd = d.strftime('%B %Y') ofile.write('</div>\n') ofile.write('<footer>\n') ofile.write('<modate>Last Modified: {}</modate>\n'.format(dd)) ofile.write('</footer>\n') ofile.write('</body>\n') ofile.write('</html>') # ------------------------------------------------------------------- # test if a string matches one of a list of regular expressions # # Regular expressions use the backslash character ('\') to # indicate special forms or to allow special characters to # be used without invoking their special meaning. This collides # with Python’s usage of the same character for the same purpose # in string literals. The solution is to use Python’s raw string # notation for regular expression patterns; backslashes are not # handled in any special way in a string literal prefixed with 'r'. # r"\n" is a two-character string containing. # # For example to match html files: 'r\.html$' or '\\.html$' # ------------------------------------------------------------------- def string_match_pattern(patterns,str): for p in patterns: if re.search(p,str,re.IGNORECASE): return True return False # ------------------------------------------------------------------- # return a list (dictionary) of selected file names # # dir directory to search/process # mpat list of file match regular expressions # spat list of file skip regular expressions # ------------------------------------------------------------------- def get_list_of_files(dir,mpat,spat): dct = {} # file dictionary (list of file) # --- get a list of entries in the directory files = os.listdir(dir) # ---- add files to the list for f in files: # ---- file path and name ff = dir + f # ---- skip hidden files (file name starts with a '.') if re.search('^\.',f): ##print('skipping hidden file {}'.format(ff)) continue # ---- skip links and directories if os.path.islink(ff): ##print('skipping link {}'.format(ff)) continue if os.path.isdir(ff): ##print('skipping dir {}'.format(ff)) continue # ---- skip the file name? if string_match_pattern(spat,f): ##print('skipping file {}'.format(f)) continue # ---- match the file name? if not string_match_pattern(mpat,f): ##print('skipping match file {}'.format(f)) continue # ---- save the selected file name in a dictionary # ---- dictionary key = file name # ---- dictionary value = path + file name ##print('adding match file {}'.format(f)) dct[f] = ff # ---- return the dictionary (list of file) return dct # ------------------------------------------------------------------- # add links to the output web page # # ofile output file # dct is a dictionary containing selected file names # dictionary key = file name # dictionary value = path + file name # ------------------------------------------------------------------- def create_web_page_links(ofile,dct): ofile.write('<p>\n') c = 0 # link count for k in sorted(dct.keys()): if c != 0: ofile.write('<br>\n') ofile.write('<a href="{}">{}</a>\n'.format(dct[k],k)) c += 1 # increment link count ofile.write('</p>\n') # =================================================================== # main # =================================================================== if __name__ == '__main__': # ---- does the directory to search/process exists? if not os.path.isdir(DIRECTORY): print() print('No directory found') print('Output file NOT created or modified') print('DIRECTORY : {}'.format(DIRECTORY)) print('OUTPUT FILE : {}'.format(OUTFILE)) print() quit() # ---- fix the directory name string (if we need too) # ---- it must end in '/' or be empty # ---- (belt and suspenders - double check) if len(DIRECTORY) > 0: if not re.search('\/$',DIRECTORY): DIRECTORY = DIRECTORY + '/' # ---- get a dictionary of selected files dct = get_list_of_files(DIRECTORY,FILEMATCHPATS,FILESKIPPATS) # ---- any files found to process? if len(dct) == 0: print() print('No files found in directory to process') print('Output file NOT created or modified') print('DIRECTORY : {}'.format(DIRECTORY)) print('OUTPUT FILE : {}'.format(OUTFILE)) for p in FILESKIPPATS: print('FILE SKIP PATTERN: {}'.format(p)) for p in FILEMATCHPATS: print('FILE MATCH PATTERN: {}'.format(p)) print() quit() # ---- create output file ofile = open(OUTFILE,"w",encoding="utf-8") start_web_page(ofile,DIRECTORY,AUTHOR,CSSFILE) create_web_page_links(ofile,dct) end_web_page(ofile) ofile.close() print() print('{} links written to file'.format(len(dct))) print()