zz_file_dir_operations.py

#! /usr/bin/python3
# ==================================================================
#
# ==================================================================

import os
import re


# -------------------------------------------------------------------
# test if a string matches one of a list/tuple of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python's usage of the same character for the same purpose
# in string literals. The solution is to use Python's raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# (r"\n" is a two-character string.)
#
# For example to match html files: r'\.html$' or '\\.html$'
# -------------------------------------------------------------------

def StringMatchPattern(patterns,str,ignorecase=True):

    for p in patterns:
        if ignorecase:
            if re.search(p,str,re.IGNORECASE):
                return True
        else:
            if re.search(p,str):
                return True 
    return False


# -------------------------------------------------------------------
# create a directory if it does not exist
# -------------------------------------------------------------------

def CreateDirectory(path):
    if os.path.exists(path):
        ##print("Path alrerady exists {}".format(path))
        if not os.path.isdir(path):
            return False
        else:
            return True
    ##print("Creating new directory {}".format(path))
    os.makedirs(path)
    return True


# -------------------------------------------------------------------
# get a list of regular files (not directories or links)
# that match a list of regular expressions
# -------------------------------------------------------------------

def GetListOfFiles(ifiles,sfiles,rdir,filelist,verbose=False,debug=False):
    '''
    Get a list of regular files that match a regular expression.
    (not directories or links)

    Attributes:

      ifiles    - list or tuple of files to be included
                  They are RegExp patters.

      sfiles    - list or tuple of files to skipped
                  They are RegExp patterns.

      rdir      - directory to be searched

      filelist  - returned list of matched files (path + file name)

      debug     - print debug information

      verbose   - print messages describing what the code is doing
    '''

    if debug:
        print("GetListOfFiles({})".format(rdir))

    # --- dir must end in a '/' character

    if not re.search('\/$',rdir):
        rdir = rdir + '/'

    # --- compile regular expression

    # --- get a list of entries in the directory

    files = os.listdir(rdir)

    # --- add a file to the list

    for f in files:

        # ---- skip hidden files and directories
        # ---- note: they start with a period '.'

        if re.search('^\.',f):
            continue

        ff = rdir + f

        # ---- skip links and directories

        if os.path.islink(ff):
            ##if debug:
            ##    print('skipping link {}'.format(ff))
            continue

        if os.path.isdir(ff):
            ##if debug:
            ##    print('skipping dir  {}'.format(ff))
            continue

        # ---- skip the file?

        if StringMatchPattern(sfiles,f):
            ##if debug:
            ##    print('skipping file {}'.format(f))
            continue

        # ---- is it a regular file?

        if not StringMatchPattern(ifiles,f):
            ##if debug:
            ##    print('skipping file {}'.format(f))
            continue

        # ---- save file (path + name)

        filelist.append(ff)

    return True


# -------------------------------------------------------------------
# get a list of sub-directories in a directory (not files or links)
# that match a list of regular expressions
# -------------------------------------------------------------------

def GetListOfDirs(sdirs,rdir,dirlist,verbose=False,debug=False):
    '''
    Get a list of sub-directories that match a regular expressions.

    Attributes:

      sdirs    - list or tuple of directories to be skipped
                  They are RegExp patterns.

      rdir       - directory to be searched (rootdir)

      dirlist   - returned list of matched files (path + file name)

      debug     - print debug information

      verbose   - print messages describing what the code is doing
    '''

    if debug:
        print("GetListOfDirs({})".format(rdir))

    # --- dir must end in a '/' character

    if not re.search('\/$',rdir):
        rdir = rdir + '/'

    # --- compile regular expression

    # --- get a list of entries in the directory

    dirs = os.listdir(rdir)

    # --- add a sub-directory to the list

    for d in dirs:

        # ---- skip hidden files and directories
        # ---- note: they start with a period '.'

        if re.search('^\.',d):
            continue

        dd = rdir + d

        # ---- is it a directory?

        if not os.path.isdir(dd):
            ##if debug:
            ##    print('skipping dir  {}'.format(ff))
            continue

        # ---- skip directory?

        if StringMatchPattern(sdirs,d):
            continue

        # ---- save directory (path + name)

        dirlist.append(dd)

    return True


# ==================================================================
# main - testing
# ==================================================================

if __name__ == '__main__':

    # ---- global variables

    DEBUG        = False
    INCLUDEFILES = [ '\\.html$', r'\.py$' ]
    ROOTDIR      = '/hodgepodge/'       # should end in a '/' character
    SKIPDIRS     = []
    SKIPFILES    = [ r'^\.', '^my_', '^tk_' ]
    VERBOSE      = False

    # ---- root directory exists?

    if not os.path.isdir(ROOTDIR):
        print("Root directory does no exist ({})".format(ROOTDIR))
        quit()

    # ---- get and display a list of files

    print('---- files --------------------------------------------')

    filelist = []

    if not GetListOfFiles(INCLUDEFILES,SKIPFILES,ROOTDIR, \
                          filelist,VERBOSE,DEBUG):
        print("GetListOfFiles failed")
    else:
        if len(filelist) < 1:
            print("No files found in directory ({})".format(rdir))
        else:
            filelist.sort()

            for f in filelist:
                print(f)
            print()
            print("{} files found".format(len(filelist)))
            print()

    # ---- get and display a list of sub-directories

    dirlist = []

    print('---- sub-directories-----------------------------------')

    if not GetListOfDirs(SKIPDIRS,ROOTDIR,dirlist,VERBOSE,DEBUG):
        print("GetListOfDirs failed")
    else:
        if len(dirlist) < 1:
            print("No sub-directories found in directory ({})".format(rdir))
        else:
            dirlist.sort()

            for d in dirlist:
                print(d)
            print()
            print("{} sub-directories found".format(len(dirlist)))
            print()

# -------------------------------------------------------------------
# walk (process) a tree or sub-tree of directories and files
#
# Dir      - root directory of a tree or sub-tree
# IncFiles -
# SkpFiles -
# SkpDirs  -
# DirFunc  - function to call on every directory under the
#            root direrctory
# FileFunc - function to call on every file under the
#            root directory
# DirFirst - call DirFunc before FileFunc (True,False)
# TestMode -
# Verbose  - print verbose messages
# Debug    - print debug mesages
# -------------------------------------------------------------------

def WalkTheTree(RootDir,DeltaDir,IncFiles,SkpFiles,SkpDirs,DirFunc,
                FileFunc,DirFirst=True,TopDown=True,TestMode=False,
                Verbose=False,Debug=False):

    # ----get a list of sub-directories in the root directory 

    dirlist = []

    if not GetListOfDirs(SkpDirs,RootDir,dirlist,Verbose,False):
        print("GetListOfDirs failed ()".format(RootDir))
        return False

    # ---- get a list of files in the root directory 

    filelist = []

    if not GetListOfFiles(IncFiles,SkpFiles,RootDir,filelist,
                          Verbose,False):
        print("GetListOfFiles failed ()".format(RootDir))
        return False

    # ---- process top down or bottom up
    # ---- process each directory and file

    if (TopDown):                 # process top down

        if (DirFirst):
            for d in dirlist:
                if not DirFunc(d,TestMode,Verbose,Debug):
                    return False
            for f in filelist:
                if not FileFunc(f,TestMode,Verbose,Debug):
                    return False
        else:
            for f in filelist:
                if not FileFunc(f,TestMode,Verbose,Debug):
                    return False
            for d in dirlist:
                if not DirFunc(d,TestMode,Verbose,Debug):
                     return False

        for d in dirlist:
            if not WalkTheTree(d,DeltaDir,IncFiles,SkpFiles,
                               SkpDirs,DirFunc,
                               FileFunc,TopDown,DirFirst,
                               TestMode,Verbose,Debug):
                return False

    else:                        # process bottom up

       for d in dirlist:
           if not WalkTheTree(d,DeltaDir,IncFiles,SkpFiles,
                              SkpDirs,DirFunc,
                              FileFunc,TowDown,DirFirst,
                              TestMode,Verbose,Debug):
               return False

       if (DirFirst):
           for d in dirlist:
               if not DirFunc(d,TestMode,Verbose,Debug):
                   return False
           for f in filelist:
               if not FileFunc(f,TestMode,Verbose,Debug):
                   return False
       else:
           for f in filelist:
               if not FileFunc(f,TestMode,Verbose,Debug):
                   return False
           for d in dirlist:
               if not DirFunc(d,TestMode,Verbose,Debug):
                   return False

    return True