#! /usr/bin/python3 # ================================================================== # get a list of specific files in a diretory # ================================================================== import re import os # ------------------------------------------------------------------- # test if a string matches one of a list/tuple of regular expressions # # Regular expressions use the backslash character ('\') to # indicate special forms or to allow special characters to # be used without invoking their special meaning. This collides # with Python's usage of the same character for the same purpose # in string literals. The solution is to use Python's raw string # notation for regular expression patterns; backslashes are not # handled in any special way in a string literal prefixed with 'r'. # r"\n" is a two-character string containing. # # For example to match html files: r'\.html$' or '\\.html$' # ------------------------------------------------------------------- def StringMatchPattern(patterns,str): for p in patterns: if re.search(p,str,re.IGNORECASE): return True return False # ------------------------------------------------------------------- # get a list of regular files (not directories or links) # that match a given regular expression # ------------------------------------------------------------------- def GetListOfFiles(includefiles,dir,filelist,verbose=False): ''' Get a list of regular files that match a regular expression. (not directories or links) Attributes: includefiles - list or tuple of files to be captured They are RegExp patters. dir - directory to be captured filelist - list of captured files (path + file name) verbose - print messages describing what the code is doing ''' if verbose: print("GetListOfFiles({})".format(dir)) # --- dir must end in a '/' if re.search('\/$',dir): dir = dir + '/' if verbose: print("Searching dir {}".format(dir)) # --- compile regular expression # --- get a list of entries in the directory files = os.listdir(dir) # --- add a file to the list for f in files: # ---- skip hidden files and directories # ---- note: they start with a period '.' if re.search('^\.',f): continue if verbose: print("testing file {}".format(f)) ff = dir + f # ---- skip links and directories if os.path.islink(ff): ##print('skipping link {}'.format(ff)) continue if os.path.isdir(ff): ##print('skipping dir {}'.format(ff)) continue # ---- skip the file? if not StringMatchPattern(includefiles,f): if verbose: print('skipping file {}'.format(f)) continue filelist.append(ff) return True # ================================================================== # main - testing # # inclidefiles - a list or a tuple of RegEx search patterens # dir - the directory to search for files # ================================================================== if __name__ == '__main__': includefiles = [ "\\.html$", r"\.py$" ] dir = '/var/www/html' dirfiles = [] print('---- files -----------------------------------------') if not GetListOfFiles(includefiles,dir,dirfiles): print("GetListOfFiles failed") else: if len(dirfiles) < 1: print("No files found in directory ({})".format(dir)) else: dirfiles.sort() for f in dirfiles: print(f) print() print("{} files found".format(len(dirfiles))) print()