ignore typical temporary files when listing directories

Operating systems love to cluter the file system with all kinds of
cruft. This adds a gitignore like config to skip those files when
listing files.
This commit is contained in:
Andreas Gohr
2024-03-13 12:09:34 +01:00
parent 5a77ba3af8
commit d6a45e5c9f
3 changed files with 122 additions and 0 deletions

View File

@@ -13,6 +13,9 @@ class Crawler
/** @var bool */
protected $sortreverse = false;
/** @var string[] patterns to ignore */
protected $ignore = [];
/**
* Initializes the crawler
*
@@ -24,6 +27,8 @@ class Crawler
$this->ext = array_map('trim', $this->ext);
$this->ext = array_map('preg_quote_cb', $this->ext);
$this->ext = implode('|', $this->ext);
$this->ignore = $this->loadIgnores();
}
public function setSortBy($sortby)
@@ -67,6 +72,9 @@ class Crawler
if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) {
continue;
}
if ($this->isFileIgnored($file)) {
continue;
}
// get title file
$filename = $file;
@@ -141,6 +149,35 @@ class Crawler
return preg_match('/(' . $this->ext . ')$/i', $file);
}
/**
* Check if a file is ignored by the ignore patterns
*
* @param string $file
* @return bool
*/
protected function isFileIgnored($file)
{
foreach ($this->ignore as $pattern) {
if ($this->fnmatch($pattern, $file)) return true;
}
return false;
}
/**
* Load the ignore patterns from the ignore.txt file
*
* @return string[]
*/
protected function loadIgnores()
{
$file = __DIR__ . '/conf/ignore.txt';
$ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$ignore = array_map(function ($line) {
return trim(preg_replace('/\s*#.*$/', '', $line));
}, $ignore);
$ignore = array_filter($ignore);
return $ignore;
}
/**
* Replacement for fnmatch() for windows systems.

View File

85
conf/ignore.txt Normal file
View File

@@ -0,0 +1,85 @@
# This is a gitignore style file to ignore typical temporary files and directories
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# iCloud generated files
*.icloud
### MicrosoftOffice ###
*.tmp
# Word temporary
~$*.doc*
# Word Auto Backup File
Backup of *.doc*
# Excel temporary
~$*.xls*
# Excel Backup File
*.xlk
# PowerPoint temporary
~$*.ppt*
# Visio autosave temporary files
*.~vsd*
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN
# Windows shortcuts
*.lnk