Refactor project structure
This commit is contained in:
336
src/Crawler.php
Normal file
336
src/Crawler.php
Normal file
@@ -0,0 +1,336 @@
|
||||
<?php
|
||||
|
||||
namespace dokuwiki\plugin\luxtools;
|
||||
|
||||
class Crawler
|
||||
{
|
||||
/** @var string regexp to check extensions */
|
||||
protected $ext;
|
||||
|
||||
/** @var string */
|
||||
protected $sortby = 'name';
|
||||
|
||||
/** @var bool */
|
||||
protected $sortreverse = false;
|
||||
|
||||
/** @var bool */
|
||||
protected $foldersFirst = false;
|
||||
|
||||
/** @var string[] patterns to ignore */
|
||||
protected $ignore = [];
|
||||
|
||||
/**
|
||||
* Initializes the crawler
|
||||
*
|
||||
* @param string $extensions The extensions to allow (comma separated list)
|
||||
*/
|
||||
public function __construct($extensions)
|
||||
{
|
||||
$this->ext = explode(',', $extensions);
|
||||
$this->ext = array_map('trim', $this->ext);
|
||||
$this->ext = array_map('preg_quote_cb', $this->ext);
|
||||
$this->ext = implode('|', $this->ext);
|
||||
|
||||
$this->ignore = $this->loadIgnores();
|
||||
}
|
||||
|
||||
public function setSortBy($sortby)
|
||||
{
|
||||
$this->sortby = $sortby;
|
||||
}
|
||||
|
||||
public function setSortReverse($sortreverse)
|
||||
{
|
||||
$this->sortreverse = $sortreverse;
|
||||
}
|
||||
|
||||
public function setFoldersFirst($foldersFirst)
|
||||
{
|
||||
$this->foldersFirst = (bool)$foldersFirst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does a (recursive) crawl for finding files based on a given pattern.
|
||||
* Based on a safe glob reimplementation using fnmatch and opendir.
|
||||
*
|
||||
* @param string $path the path to search in
|
||||
* @param string $pattern the pattern to match to
|
||||
* @param bool $recursive whether to search recursively
|
||||
* @param string $titlefile the name of the title file
|
||||
* @return array a hierarchical filelist or false if nothing could be found
|
||||
*
|
||||
* @see http://www.php.net/manual/en/function.glob.php#71083
|
||||
*/
|
||||
public function crawl($root, $local, $pattern, $recursive, $titlefile)
|
||||
{
|
||||
$path = $root . $local;
|
||||
|
||||
// do not descent into wiki or data directories
|
||||
if (Path::isWikiControlled($path)) return [];
|
||||
|
||||
if (($dir = opendir($path)) === false) return [];
|
||||
$result = [];
|
||||
while (($file = readdir($dir)) !== false) {
|
||||
if ($file[0] == '.' || $file == $titlefile) {
|
||||
// ignore hidden, system and title files
|
||||
continue;
|
||||
}
|
||||
$self = $local . '/' . $file;
|
||||
$filepath = $path . '/' . $file;
|
||||
if (!is_readable($filepath)) continue;
|
||||
|
||||
if ($this->fnmatch($pattern, $file) || (is_dir($filepath) && $recursive)) {
|
||||
if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) {
|
||||
continue;
|
||||
}
|
||||
if ($this->isFileIgnored($file)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get title file
|
||||
$filename = $file;
|
||||
if (is_dir($filepath)) {
|
||||
$title = $filepath . '/' . $titlefile;
|
||||
if (is_readable($title)) {
|
||||
$filename = io_readFile($title, false);
|
||||
}
|
||||
}
|
||||
|
||||
// prepare entry
|
||||
if (!is_dir($filepath) || $recursive) {
|
||||
$entry = [
|
||||
'name' => $filename,
|
||||
'local' => $self,
|
||||
'path' => $filepath,
|
||||
'mtime' => filemtime($filepath),
|
||||
'ctime' => filectime($filepath),
|
||||
'size' => filesize($filepath),
|
||||
'children' => ((is_dir($filepath) && $recursive) ?
|
||||
$this->crawl($root, $self, $pattern, $recursive, $titlefile) :
|
||||
false
|
||||
),
|
||||
'treesize' => 0,
|
||||
];
|
||||
|
||||
// calculate tree size
|
||||
if ($entry['children'] !== false) {
|
||||
foreach ($entry['children'] as $child) {
|
||||
$entry['treesize'] += $child['treesize'];
|
||||
}
|
||||
} else {
|
||||
$entry['treesize'] = 1;
|
||||
}
|
||||
|
||||
// add entry to result
|
||||
$result[] = $entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir($dir);
|
||||
return $this->sortItems($result);
|
||||
}
|
||||
|
||||
/**
|
||||
* List the direct children (files and directories) of a given local path.
|
||||
*
|
||||
* Unlike crawl(), this includes directories even when not recursing.
|
||||
*
|
||||
* @param string $root
|
||||
* @param string $local
|
||||
* @param string $titlefile
|
||||
* @return array
|
||||
*/
|
||||
public function listDirectory($root, $local, $titlefile)
|
||||
{
|
||||
$path = $root . $local;
|
||||
$path = rtrim($path, '/');
|
||||
|
||||
// do not list wiki or data directories
|
||||
if (Path::isWikiControlled($path)) return [];
|
||||
|
||||
if (($dir = opendir($path)) === false) return [];
|
||||
$result = [];
|
||||
while (($file = readdir($dir)) !== false) {
|
||||
if ($file[0] == '.' || $file == $titlefile) {
|
||||
// ignore hidden, system and title files
|
||||
continue;
|
||||
}
|
||||
|
||||
$filepath = $path . '/' . $file;
|
||||
if (!is_readable($filepath)) continue;
|
||||
|
||||
$isDir = is_dir($filepath);
|
||||
if (!$isDir && !$this->isExtensionAllowed($file)) {
|
||||
continue;
|
||||
}
|
||||
if ($this->isFileIgnored($file)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get title file (directories only)
|
||||
$filename = $file;
|
||||
if ($isDir) {
|
||||
$title = $filepath . '/' . $titlefile;
|
||||
if (is_readable($title)) {
|
||||
$filename = io_readFile($title, false);
|
||||
}
|
||||
}
|
||||
|
||||
// build a local path consistent with crawl() (leading slash for root)
|
||||
$self = rtrim($local, '/') . '/' . $file;
|
||||
if ($self === '/' . $file) {
|
||||
// keep the original behaviour when local is empty
|
||||
$self = '/' . $file;
|
||||
}
|
||||
|
||||
$entry = [
|
||||
'name' => $filename,
|
||||
'local' => $self,
|
||||
'path' => $filepath,
|
||||
'mtime' => filemtime($filepath),
|
||||
'ctime' => filectime($filepath),
|
||||
'size' => $isDir ? 0 : filesize($filepath),
|
||||
'children' => false,
|
||||
'treesize' => 1,
|
||||
'isdir' => $isDir,
|
||||
];
|
||||
|
||||
$result[] = $entry;
|
||||
}
|
||||
closedir($dir);
|
||||
return $this->sortItems($result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort the given items by the current sortby and sortreverse settings
|
||||
*
|
||||
* @param array $items
|
||||
* @return array
|
||||
*/
|
||||
protected function sortItems($items)
|
||||
{
|
||||
$callback = [$this, 'compare' . ucfirst($this->sortby)];
|
||||
if (!is_callable($callback)) return $items;
|
||||
|
||||
// Optional grouping: keep directories before files.
|
||||
// Implement reverse ordering by inverting comparisons instead of array_reverse(),
|
||||
// so the directory-first grouping stays intact.
|
||||
if ($this->foldersFirst) {
|
||||
usort($items, function ($a, $b) use ($callback) {
|
||||
$aIsDir = $this->isDirectoryItem($a);
|
||||
$bIsDir = $this->isDirectoryItem($b);
|
||||
if ($aIsDir !== $bIsDir) {
|
||||
return $aIsDir ? -1 : 1;
|
||||
}
|
||||
|
||||
$cmp = call_user_func($callback, $a, $b);
|
||||
if ($this->sortreverse) $cmp = -$cmp;
|
||||
return $cmp;
|
||||
});
|
||||
} else {
|
||||
usort($items, $callback);
|
||||
if ($this->sortreverse) {
|
||||
$items = array_reverse($items);
|
||||
}
|
||||
}
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect whether an item represents a directory.
|
||||
* Supports both crawl() results (children tree) and listDirectory() results (isdir).
|
||||
*/
|
||||
protected function isDirectoryItem($item)
|
||||
{
|
||||
if (!is_array($item)) return false;
|
||||
if (!empty($item['isdir'])) return true;
|
||||
return array_key_exists('children', $item) && $item['children'] !== false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file is allowed by the configured extensions
|
||||
*
|
||||
* @param string $file
|
||||
* @return bool
|
||||
*/
|
||||
protected function isExtensionAllowed($file)
|
||||
{
|
||||
if ($this->ext === '') return true; // no restriction
|
||||
return preg_match('/(' . $this->ext . ')$/i', $file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file is ignored by the ignore patterns
|
||||
*
|
||||
* @param string $file
|
||||
* @return bool
|
||||
*/
|
||||
protected function isFileIgnored($file)
|
||||
{
|
||||
foreach ($this->ignore as $pattern) {
|
||||
if ($this->fnmatch($pattern, $file)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the ignore patterns from the ignore.txt file
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
protected function loadIgnores()
|
||||
{
|
||||
$file = __DIR__ . '/../conf/ignore.txt';
|
||||
$ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
$ignore = array_map(static fn($line) => trim(preg_replace('/\s*#.*$/', '', $line)), $ignore);
|
||||
$ignore = array_filter($ignore);
|
||||
return $ignore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replacement for fnmatch() for windows systems.
|
||||
*
|
||||
* @author jk at ricochetsolutions dot com
|
||||
* @link http://www.php.net/manual/en/function.fnmatch.php#71725
|
||||
*/
|
||||
protected function fnmatch($pattern, $string)
|
||||
{
|
||||
return preg_match(
|
||||
"#^" . strtr(
|
||||
preg_quote($pattern, '#'),
|
||||
[
|
||||
'\*' => '.*',
|
||||
'\?' => '.',
|
||||
'\[' => '[',
|
||||
'\]' => ']'
|
||||
]
|
||||
) . "$#i",
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
public function compareName($a, $b)
|
||||
{
|
||||
return strcmp($a['name'], $b['name']);
|
||||
}
|
||||
|
||||
public function compareIname($a, $b)
|
||||
{
|
||||
return strcmp(strtolower($a['name']), strtolower($b['name']));
|
||||
}
|
||||
|
||||
public function compareCtime($a, $b)
|
||||
{
|
||||
return $a['ctime'] <=> $b['ctime'];
|
||||
}
|
||||
|
||||
public function compareMtime($a, $b)
|
||||
{
|
||||
return $a['mtime'] <=> $b['mtime'];
|
||||
}
|
||||
|
||||
public function compareSize($a, $b)
|
||||
{
|
||||
return $a['size'] <=> $b['size'];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user