ignore typical temporary files when listing directories
Operating systems love to cluter the file system with all kinds of cruft. This adds a gitignore like config to skip those files when listing files.
This commit is contained in:
37
Crawler.php
37
Crawler.php
@@ -13,6 +13,9 @@ class Crawler
|
||||
/** @var bool */
|
||||
protected $sortreverse = false;
|
||||
|
||||
/** @var string[] patterns to ignore */
|
||||
protected $ignore = [];
|
||||
|
||||
/**
|
||||
* Initializes the crawler
|
||||
*
|
||||
@@ -24,6 +27,8 @@ class Crawler
|
||||
$this->ext = array_map('trim', $this->ext);
|
||||
$this->ext = array_map('preg_quote_cb', $this->ext);
|
||||
$this->ext = implode('|', $this->ext);
|
||||
|
||||
$this->ignore = $this->loadIgnores();
|
||||
}
|
||||
|
||||
public function setSortBy($sortby)
|
||||
@@ -67,6 +72,9 @@ class Crawler
|
||||
if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) {
|
||||
continue;
|
||||
}
|
||||
if ($this->isFileIgnored($file)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get title file
|
||||
$filename = $file;
|
||||
@@ -141,6 +149,35 @@ class Crawler
|
||||
return preg_match('/(' . $this->ext . ')$/i', $file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file is ignored by the ignore patterns
|
||||
*
|
||||
* @param string $file
|
||||
* @return bool
|
||||
*/
|
||||
protected function isFileIgnored($file)
|
||||
{
|
||||
foreach ($this->ignore as $pattern) {
|
||||
if ($this->fnmatch($pattern, $file)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the ignore patterns from the ignore.txt file
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
protected function loadIgnores()
|
||||
{
|
||||
$file = __DIR__ . '/conf/ignore.txt';
|
||||
$ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
$ignore = array_map(function ($line) {
|
||||
return trim(preg_replace('/\s*#.*$/', '', $line));
|
||||
}, $ignore);
|
||||
$ignore = array_filter($ignore);
|
||||
return $ignore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replacement for fnmatch() for windows systems.
|
||||
|
||||
0
_test/filelistdata/~$ignoreme.docx
Normal file
0
_test/filelistdata/~$ignoreme.docx
Normal file
85
conf/ignore.txt
Normal file
85
conf/ignore.txt
Normal file
@@ -0,0 +1,85 @@
|
||||
# This is a gitignore style file to ignore typical temporary files and directories
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
# iCloud generated files
|
||||
*.icloud
|
||||
|
||||
### MicrosoftOffice ###
|
||||
*.tmp
|
||||
|
||||
# Word temporary
|
||||
~$*.doc*
|
||||
|
||||
# Word Auto Backup File
|
||||
Backup of *.doc*
|
||||
|
||||
# Excel temporary
|
||||
~$*.xls*
|
||||
|
||||
# Excel Backup File
|
||||
*.xlk
|
||||
|
||||
# PowerPoint temporary
|
||||
~$*.ppt*
|
||||
|
||||
# Visio autosave temporary files
|
||||
*.~vsd*
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
Reference in New Issue
Block a user