#! /usr/bin/python3 # ================================================================== # # ================================================================== import os import re # ------------------------------------------------------------------- # test if a string matches one of a list/tuple of regular expressions # # Regular expressions use the backslash character ('\') to # indicate special forms or to allow special characters to # be used without invoking their special meaning. This collides # with Python's usage of the same character for the same purpose # in string literals. The solution is to use Python's raw string # notation for regular expression patterns; backslashes are not # handled in any special way in a string literal prefixed with 'r'. # (r"\n" is a two-character string.) # # For example to match html files: r'\.html$' or '\\.html$' # ------------------------------------------------------------------- def StringMatchPattern(patterns,str,ignorecase=True): for p in patterns: if ignorecase: if re.search(p,str,re.IGNORECASE): return True else: if re.search(p,str): return True return False # ------------------------------------------------------------------- # create a directory if it does not exist # ------------------------------------------------------------------- def CreateDirectory(path): if os.path.exists(path): ##print("Path alrerady exists {}".format(path)) if not os.path.isdir(path): return False else: return True ##print("Creating new directory {}".format(path)) os.makedirs(path) return True # ------------------------------------------------------------------- # get a list of regular files (not directories or links) # that match a list of regular expressions # ------------------------------------------------------------------- def GetListOfFiles(ifiles,sfiles,rdir,filelist,verbose=False,debug=False): ''' Get a list of regular files that match a regular expression. (not directories or links) Attributes: ifiles - list or tuple of files to be included They are RegExp patters. sfiles - list or tuple of files to skipped They are RegExp patterns. rdir - directory to be searched filelist - returned list of matched files (path + file name) debug - print debug information verbose - print messages describing what the code is doing ''' if debug: print("GetListOfFiles({})".format(rdir)) # --- dir must end in a '/' character if not re.search('\/$',rdir): rdir = rdir + '/' # --- compile regular expression # --- get a list of entries in the directory files = os.listdir(rdir) # --- add a file to the list for f in files: # ---- skip hidden files and directories # ---- note: they start with a period '.' if re.search('^\.',f): continue ff = rdir + f # ---- skip links and directories if os.path.islink(ff): ##if debug: ## print('skipping link {}'.format(ff)) continue if os.path.isdir(ff): ##if debug: ## print('skipping dir {}'.format(ff)) continue # ---- skip the file? if StringMatchPattern(sfiles,f): ##if debug: ## print('skipping file {}'.format(f)) continue # ---- is it a regular file? if not StringMatchPattern(ifiles,f): ##if debug: ## print('skipping file {}'.format(f)) continue # ---- save file (path + name) filelist.append(ff) return True # ------------------------------------------------------------------- # get a list of sub-directories in a directory (not files or links) # that match a list of regular expressions # ------------------------------------------------------------------- def GetListOfDirs(sdirs,rdir,dirlist,verbose=False,debug=False): ''' Get a list of sub-directories that match a regular expressions. Attributes: sdirs - list or tuple of directories to be skipped They are RegExp patterns. rdir - directory to be searched (rootdir) dirlist - returned list of matched files (path + file name) debug - print debug information verbose - print messages describing what the code is doing ''' if debug: print("GetListOfDirs({})".format(rdir)) # --- dir must end in a '/' character if not re.search('\/$',rdir): rdir = rdir + '/' # --- compile regular expression # --- get a list of entries in the directory dirs = os.listdir(rdir) # --- add a sub-directory to the list for d in dirs: # ---- skip hidden files and directories # ---- note: they start with a period '.' if re.search('^\.',d): continue dd = rdir + d # ---- is it a directory? if not os.path.isdir(dd): ##if debug: ## print('skipping dir {}'.format(ff)) continue # ---- skip directory? if StringMatchPattern(sdirs,d): continue # ---- save directory (path + name) dirlist.append(dd) return True # ================================================================== # main - testing # ================================================================== if __name__ == '__main__': # ---- global variables DEBUG = False INCLUDEFILES = [ '\\.html$', r'\.py$' ] ROOTDIR = '/hodgepodge/' # should end in a '/' character SKIPDIRS = [] SKIPFILES = [ r'^\.', '^my_', '^tk_' ] VERBOSE = False # ---- root directory exists? if not os.path.isdir(ROOTDIR): print("Root directory does no exist ({})".format(ROOTDIR)) quit() # ---- get and display a list of files print('---- files --------------------------------------------') filelist = [] if not GetListOfFiles(INCLUDEFILES,SKIPFILES,ROOTDIR, \ filelist,VERBOSE,DEBUG): print("GetListOfFiles failed") else: if len(filelist) < 1: print("No files found in directory ({})".format(rdir)) else: filelist.sort() for f in filelist: print(f) print() print("{} files found".format(len(filelist))) print() # ---- get and display a list of sub-directories dirlist = [] print('---- sub-directories-----------------------------------') if not GetListOfDirs(SKIPDIRS,ROOTDIR,dirlist,VERBOSE,DEBUG): print("GetListOfDirs failed") else: if len(dirlist) < 1: print("No sub-directories found in directory ({})".format(rdir)) else: dirlist.sort() for d in dirlist: print(d) print() print("{} sub-directories found".format(len(dirlist))) print() # ------------------------------------------------------------------- # walk (process) a tree or sub-tree of directories and files # # Dir - root directory of a tree or sub-tree # IncFiles - # SkpFiles - # SkpDirs - # DirFunc - function to call on every directory under the # root direrctory # FileFunc - function to call on every file under the # root directory # DirFirst - call DirFunc before FileFunc (True,False) # TestMode - # Verbose - print verbose messages # Debug - print debug mesages # ------------------------------------------------------------------- def WalkTheTree(RootDir,DeltaDir,IncFiles,SkpFiles,SkpDirs,DirFunc, FileFunc,DirFirst=True,TopDown=True,TestMode=False, Verbose=False,Debug=False): # ----get a list of sub-directories in the root directory dirlist = [] if not GetListOfDirs(SkpDirs,RootDir,dirlist,Verbose,False): print("GetListOfDirs failed ()".format(RootDir)) return False # ---- get a list of files in the root directory filelist = [] if not GetListOfFiles(IncFiles,SkpFiles,RootDir,filelist, Verbose,False): print("GetListOfFiles failed ()".format(RootDir)) return False # ---- process top down or bottom up # ---- process each directory and file if (TopDown): # process top down if (DirFirst): for d in dirlist: if not DirFunc(d,TestMode,Verbose,Debug): return False for f in filelist: if not FileFunc(f,TestMode,Verbose,Debug): return False else: for f in filelist: if not FileFunc(f,TestMode,Verbose,Debug): return False for d in dirlist: if not DirFunc(d,TestMode,Verbose,Debug): return False for d in dirlist: if not WalkTheTree(d,DeltaDir,IncFiles,SkpFiles, SkpDirs,DirFunc, FileFunc,TopDown,DirFirst, TestMode,Verbose,Debug): return False else: # process bottom up for d in dirlist: if not WalkTheTree(d,DeltaDir,IncFiles,SkpFiles, SkpDirs,DirFunc, FileFunc,TowDown,DirFirst, TestMode,Verbose,Debug): return False if (DirFirst): for d in dirlist: if not DirFunc(d,TestMode,Verbose,Debug): return False for f in filelist: if not FileFunc(f,TestMode,Verbose,Debug): return False else: for f in filelist: if not FileFunc(f,TestMode,Verbose,Debug): return False for d in dirlist: if not DirFunc(d,TestMode,Verbose,Debug): return False return True