#! /usr/bin/python3 # =================================================================== # get a list of directories in a directory tree # =================================================================== import re import os # ------------------------------------------------------------------- # test if a string matches one of a list/tuple of regular expressions # # Regular expressions use the backslash character ('\') to # indicate special forms or to allow special characters to # be used without invoking their special meaning. This collides # with Python's usage of the same character for the same purpose # in string literals. The solution is to use Python's raw string # notation for regular expression patterns; backslashes are not # handled in any special way in a string literal prefixed with 'r'. # r"\n" is a two-character string containing. # # For example to match html files: r'\.html$' or '\\.html$' # ------------------------------------------------------------------- def StringMatchPattern(patterns,str): for p in patterns: if re.search(p,str,re.IGNORECASE): return True return False # ------------------------------------------------------------------- # get a list of directories # ------------------------------------------------------------------- def GetListOfDirs(skipdirs,treedir,dirlist,level=None,verbose=False): ''' Create a list of directories in all or part of a directory tree. (no regular files or links) Arguments: skipdirs - list or tuple of the directories to not capture. They are RegExp patterns. treedir - current directory in the directory tree being processed dirlist - the list of captured directories (path + directory name) level - The number of directory levels to capture level = None -- capture all levels of directories level < 0 -- capture nothing and return level = 0 -- capture this directory level > 0 -- decrement level and keep going verbose - print messages describing what the code is doing ''' if verbose: print("GetListOfDirs({},level={},verbose={})". format(treedir,level,verbose)) # have if we have captured enough directory levels? if level != None: if level < 0: return True level = level - 1 # --- treedir must end in '/' if not re.search('\/$',treedir): treedir = treedir + '/' if verbose: print("searching dir {}".format(treedir)) # --- get a list of entries in the directory dirs = os.listdir(treedir) # --- add directories to the list for d in dirs: # ---- skip hidden files and directories # ---- note: they start with a period '.' if re.search('^\.',d): continue if verbose: print("testing dir entry {}".format(d)) dd = treedir + d # ---- skip links if os.path.islink(dd): ##print('skipping link {}'.format(dd)) continue # ---- skip non-directories if not os.path.isdir(dd): ##print('skipping non-dir {}'.format(dd)) continue # ---- skip the directoy? if StringMatchPattern(skipdirs,d): if verbose: print("skipping dir {}".format(d)) continue # ---- add the directory to the list ##print("adding {} to dir list".format(dd)) dirlist.append(dd) # ---- search sub-directory GetListOfDirs(skipdirs,dd,dirlist,level,verbose) return True # =================================================================== # main - testing # # skipdirs - a list or a tuple of RegEx search patterens # treeroot - root of a directory tree to search # =================================================================== if __name__ == '__main__': skipdirs = [ "^wiki$", "^x$" ] treeroot = '/var/www/html' treedirs = [treeroot] print('---- directories -----------------------------------') if not GetListOfDirs(skipdirs,treeroot,treedirs): print("GetListOfDirs failed") else: if len(treedirs) < 1: print('No directories found in tree ({})'.format(treeroot)) else: treedirs.sort() for d in treedirs: print(d) print() print("{} directories found".format(len(treedirs))) print()