#! /usr/bin/python3
# ==================================================================
# get a list of specific files in a diretory
# ==================================================================
import re
import os
# -------------------------------------------------------------------
# test if a string matches one of a list/tuple of regular expressions
#
# Regular expressions use the backslash character ('\') to
# indicate special forms or to allow special characters to
# be used without invoking their special meaning. This collides
# with Python's usage of the same character for the same purpose
# in string literals. The solution is to use Python's raw string
# notation for regular expression patterns; backslashes are not
# handled in any special way in a string literal prefixed with 'r'.
# r"\n" is a two-character string containing.
#
# For example to match html files: r'\.html$' or '\\.html$'
# -------------------------------------------------------------------
def StringMatchPattern(patterns,str):
for p in patterns:
if re.search(p,str,re.IGNORECASE):
return True
return False
# -------------------------------------------------------------------
# get a list of regular files (not directories or links)
# that match a given regular expression
# -------------------------------------------------------------------
def GetListOfFiles(includefiles,dir,filelist,verbose=False):
'''
Get a list of regular files that match a regular expression.
(not directories or links)
Attributes:
includefiles - list or tuple of files to be captured
They are RegExp patters.
dir - directory to be captured
filelist - list of captured files
(path + file name)
verbose - print messages describing what the code is doing
'''
if verbose:
print("GetListOfFiles({})".format(dir))
# --- dir must end in a '/'
if re.search('\/$',dir):
dir = dir + '/'
if verbose:
print("Searching dir {}".format(dir))
# --- compile regular expression
# --- get a list of entries in the directory
files = os.listdir(dir)
# --- add a file to the list
for f in files:
# ---- skip hidden files and directories
# ---- note: they start with a period '.'
if re.search('^\.',f):
continue
if verbose:
print("testing file {}".format(f))
ff = dir + f
# ---- skip links and directories
if os.path.islink(ff):
##print('skipping link {}'.format(ff))
continue
if os.path.isdir(ff):
##print('skipping dir {}'.format(ff))
continue
# ---- skip the file?
if not StringMatchPattern(includefiles,f):
if verbose:
print('skipping file {}'.format(f))
continue
filelist.append(ff)
return True
# ==================================================================
# main - testing
#
# inclidefiles - a list or a tuple of RegEx search patterens
# dir - the directory to search for files
# ==================================================================
if __name__ == '__main__':
includefiles = [ "\\.html$", r"\.py$" ]
dir = '/var/www/html'
dirfiles = []
print('---- files -----------------------------------------')
if not GetListOfFiles(includefiles,dir,dirfiles):
print("GetListOfFiles failed")
else:
if len(dirfiles) < 1:
print("No files found in directory ({})".format(dir))
else:
dirfiles.sort()
for f in dirfiles:
print(f)
print()
print("{} files found".format(len(dirfiles)))
print()