build_index_html_file_new_PY

#!/usr/bin/python3
# ===================================================================
# Build a HTML file (index.html) containing links to selected
# files in a directory
# -------------------------------------------------------------------
# This script assumes that only a single directory will be
# searched/processed. Therefore, there will be no duplicate 
# file names. If the script is modified to search/process
# more than one directory, duplicate file names are possible.
# -------------------------------------------------------------------
# The web server may try to execute the file pointed to by the link
# created by this script. (For example, python.) This script can
# copy and rename a file so it can be displayed as text.
# For example:  xxxx.py -> xxxx_PY
# ===================================================================

import re
import os
import datetime
import shutil


# -------------------------------------------------------------------
# global variables, constants, etc.
# -------------------------------------------------------------------

AUTHOR          = 'Tom Wolfe'      # HTML file author

CSSFILE         = 'xxxx.css'       # HTML CSS file

DIRECTORY       = './'             # directory to be searched/processed
                                   # ending '/' character required

FILEMATCHPATS   = [ r'\.html$',    # file name patterns - match regular expresion
                    r'\.pdf$',
                    r'\.png$',
                    r'\.txt$',
                    r'\.css$' ]

FILERENAMEPATS  = [ r'\.py$',      # file name patterns - rename regular expressions
                    r'\.bat$' ]

FILESKIPPATS    = ['^index.html$'] # file name patterns - skip regular expressions

REPLACEMENTSTRS = [                # file name patterns - rename match and replace strings
                  (r'\.py$','_PY'),
                  (r'\.bat$','_BAT') ]

OUTFILE         = './index.html'   # output file

VERBOSE         = True

# -------------------------------------------------------------------
# output start web page
# -------------------------------------------------------------------

def start_web_page(ofile,dir,author=None,stylesheet=None):

    ofile.write('<!DOCTYPE html>\n')
    ofile.write('<html>\n')
    ofile.write('<head>\n')
    ofile.write('<meta charset="utf-8" />\n')

    if author is not None:
        ofile.write('<meta name="author" content="{}" />\n'.
        format(author))

    if stylesheet is not None:
        ofile.write('<link rel="stylesheet" href="{}" />\n'.
        format(stylesheet))

    ofile.write('</head>\n')
    ofile.write('<body>\n')
    ofile.write('<header>\n')
    ofile.write('<center>Dir: {}</center>\n'.format(dir))
    ofile.write('</header>\n')
    ofile.write('<div class="indent12">\n')


# -------------------------------------------------------------------
# output end of web page
# -------------------------------------------------------------------

def end_web_page(ofile):

    d = datetime.datetime.now()

    dd = d.strftime('%B %Y')

    ofile.write('</div>\n')
    ofile.write('<footer>\n')
    ofile.write('<modate>Last Modified: {}</modate>\n'.format(dd))
    ofile.write('</footer>\n')
    ofile.write('</body>\n')
    ofile.write('</html>')


# -------------------------------------------------------------------
# test if a string matches one of a list of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python’s usage of the same character for the same purpose
# in string literals. The solution is to use Python’s raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# r"\n" is a two-character string containing.
#
# For example to match html files: 'r\.html$' or '\\.html$'
# -------------------------------------------------------------------

def string_match_pattern(patterns,str):

    for p in patterns:
        if re.search(p,str,re.IGNORECASE):
            return True
    return False


# -------------------------------------------------------------------
# return a list (dictionary) of selected file names
#
# dir  directory to search/process
# spat  list of file skip  regular expressions
# mpat  list of file match regular expressions
# rpat  list of file rename regular expressions
# -------------------------------------------------------------------

def get_list_of_files(dir,spat,mpat,rpat):

    mdct = {}                  # file dictionary (list of file)
    rdct = {}                  # file dictionary (list of file)

    # --- get a list of entries in the directory

    files = os.listdir(dir)

    # ---- add files to the list

    for f in files:

        # ---- file path and name

        ff = dir + f

        # ---- skip hidden files (file name starts with a '.')

        if re.search('^\.',f):
            ##print('skipping hidden file {}'.format(ff))
            continue

        # ---- skip links and directories

        if os.path.islink(ff):
            ##print('skipping link {}'.format(ff))
            continue

        if os.path.isdir(ff):
            ##print('skipping dir {}'.format(ff))
            continue

        # ---- skip the file name?

        if string_match_pattern(spat,f):
            ##print('skipping file {}'.format(f))
            continue

        # ---- match patterm matches the file name?

        if string_match_pattern(mpat,f):
            ##print('pattern matches file {}'.format(f))

            # ---- save the selected file name in a dictionary
            # ---- dictionary key   = file name
            # ---- dictionary value = path + file name

            ##print('adding match file {}'.format(f))

            mdct[f] = ff

            continue

        # ---- rename pattern matches the file name?

        if string_match_pattern(rpat,f):
            ##print('rename patterm matches file {}'.format(f))

            # ---- save the selected file name in a dictionary
            # ---- dictionary key   = file name
            # ---- dictionary value = path + file name

            ##print('adding rename file {}'.format(f))

            rdct[f] = ff

    # ---- return the dictionarys (list of file)

    return (mdct,rdct)


# -------------------------------------------------------------------
# add links to the output web page
#
# ofile  output file
# mdct   is a dictionary containing matched file names
#        dictionary key   = file name
#        dictionary value = path + file name
# rdct   is a dictionary containing rename file names
#        dictionary key   = file name
#        dictionary value = path + file name
# rstrs  is a list of file name replacement strings
#        (list entryies are tuples) 
#          [0] regexp pattern (string to replace)
#          [1] replacment string
# -------------------------------------------------------------------

def create_web_page_links(ofile,dir,mdct,rdct,rstrs):

    # ---------------------------------------------------------------
    # copy and rename a file into the same directory
    # dir    directory path
    # orgf   original file name
    # newf   new file name
    # rstrs  file name replacement strings
    # ----
    # note: if you want to just rename the file
    # os.rename(dir+orgf, dir+newf)
    # ---------------------------------------------------------------

    def copy_and_rename_file(dir,orgf,newf):
        ##print('copy_and_rename_file({},{},{})'.format(dir,orgf,newf))

        o = dir + orgf            # original file (path + name)
        n = dir + newf            # new file      (path + name)

        if o == n:
            print('Error: original and new files have the ' +
                  'same name ({})\n'.format(o))
            return False

        shutil.copy(o,n)

        return True


    # ---------------------------------------------------------------
    # write regular links to the otput file
    # mdct:
    #    dictionary key   = file name
    #    dictionary value = path + file name
    # ---------------------------------------------------------------

    def create_regular_links(ofile,dir,mdct):
        ##print('\nCreate_regular_links()')

        ofile.write('<p>\n')
        c = 0                   # link count
        for k in sorted(mdct.keys()):
            if c != 0:
                ofile.write('<br>\n')
            ofile.write('<a href="{}">{}</a>\n'.format(mdct[k],k))
            ##print('  {}'.format(k))
            c += 1              # increment link count
        ofile.write('</p>\n')
        return True

    # ---------------------------------------------------------------
    # create new file name
    # ---------------------------------------------------------------
    # fn     file name
    # rstrs  replacement strings list (list entries are tuples)
    #        [0]   regexp (string to replace)
    #        [1]   replacement string                
    # ---------------------------------------------------------------
    # returns a tuple
    # [0]   found a match - true/False
    # [1]   new file name
    # ---------------------------------------------------------------

    def create_new_file_name(fn,rstrs):
        ##print('create_new_name({})'.format(fn))

        for p in rstrs:
            if re.search(p[0],fn,re.IGNORECASE) is not None:
                nn = re.sub(p[0],p[1],fn,flags=re.IGNORECASE)
                ##print('new file name ({}) -> ({})'.format(fn,nn))
                return (True,nn)
        return (False,'')

    # ---------------------------------------------------------------
    # write rename links to the output file
    # rdct:
    #    dictionary key   = file name
    #    dictionary value = path + file name
    # ---------------------------------------------------------------

    def create_renamed_links(ofile,dir,rdct,rstrs):
        ##print('create_renamed_links()')

        ofile.write('<h2>Rename these files</h2>\n')
        ofile.write('<p>\n')

        c = 0                   # link count
        for k in sorted(rdct.keys()):

            # ---- new file name?

            (err,nn) = create_new_file_name(k,rstrs)

            if err is False:
               return

            # ---- copy original file to new file

            copy_and_rename_file(dir,k,nn)

            # ---- write link to output file

            if c != 0:
                ofile.write('<br>\n')

            ofile.write('<a href="{}">{}</a>\n'.format(dir+nn,nn))
            ##print('  {}'.format(nn))

            c += 1              # increment link count

        ofile.write('</p>\n')


    # ---------------------------------------------------------------
    # ---- function's main code -------------------------------------
    # ---------------------------------------------------------------

    # ---- create regular links?

    if len(rdct) > 0:
        create_regular_links(ofile,dir,mdct)

    # ---- any replacement strings defined?

    if rstrs is None or len(rstrs) < 1:
        print('Error: no file name replacement strings\n')
        return False

    # ---- create replacement links?

    if len(rdct) > 0:
        create_renamed_links(ofile,dir,rdct,rstrs)


# -------------------------------------------------------------------
# verbose runtime
# -------------------------------------------------------------------

def verbose_runtime():
    print('DIRECTORY          : {}'.format(DIRECTORY))
    print('OUTPUT FILE        : {}'.format(OUTFILE))
    for p in FILESKIPPATS:
        print('FILE SKIP  PATTERN : {}'.format(p))
    for p in FILEMATCHPATS:
        print('FILE MATCH PATTERN : {}'.format(p))
    for p in FILERENAMEPATS:
        print('FILE RENAME PATTERN: {}'.format(p))
    for r in REPLACEMENTSTRS:
        print('REPLACEMENT STRS   : {}'.format(r))


# ===================================================================
# main
# ===================================================================

if __name__ == '__main__':

    # ---- does the directory to search/process exists? 

    if not os.path.isdir(DIRECTORY):
        print()
        print('No directory found')
        print('Output file NOT created or modified')
        print('DIRECTORY   : {}'.format(DIRECTORY))
        print('OUTPUT FILE : {}'.format(OUTFILE))
        print()
        quit()

    # ---- fix the directory name string (if we need too)
    # ---- it must end in '/' or be empty
    # ---- (belt and suspenders - double check)

    if len(DIRECTORY) > 0:
        if not re.search('\/$',DIRECTORY):
            DIRECTORY = DIRECTORY + '/'

    # ---- get lists (dictionaries) of selected files

    (mdct,rdct) = get_list_of_files(DIRECTORY,FILESKIPPATS,
                                    FILEMATCHPATS,FILERENAMEPATS)

    # ---- any files found to process?

    if len(mdct) == 0 and len(rdct) == 0:
        print()
        print('No files found in directory to process')
        print('Output file NOT created or modified')
        if VERBOSE:
            verbose_runtime()
        print()
        quit()

    # ---- display verbose messages?

    if VERBOSE:
        verbose_runtime()

    # ---- create output file

    ofile = open(OUTFILE,"w",encoding="utf-8")

    start_web_page(ofile,DIRECTORY,AUTHOR,CSSFILE)

    create_web_page_links(ofile,DIRECTORY,mdct,rdct,REPLACEMENTSTRS)

    end_web_page(ofile)

    ofile.close()

    print()
    print('{:3} match   links written to output file'.format(len(mdct)))
    print('{:3} renamed links written to output file'.format(len(rdct)))
    print()