summaryrefslogtreecommitdiff
path: root/platform/www/bin/wantedpages.php
blob: 6b3f78743346b354bc2ef3e51df3a89412985272 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env php
<?php

use splitbrain\phpcli\CLI;
use splitbrain\phpcli\Options;

if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
define('NOSESSION', 1);
require_once(DOKU_INC . 'inc/init.php');

/**
 * Find wanted pages
 */
class WantedPagesCLI extends CLI {

    const DIR_CONTINUE = 1;
    const DIR_NS = 2;
    const DIR_PAGE = 3;

    private $skip = false;
    private $sort = 'wanted';

    private $result = array();

    /**
     * Register options and arguments on the given $options object
     *
     * @param Options $options
     * @return void
     */
    protected function setup(Options $options) {
        $options->setHelp(
            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
            ' (the pages that are linkin to these missing pages).'
        );
        $options->registerArgument(
            'namespace',
            'The namespace to lookup. Defaults to root namespace',
            false
        );

        $options->registerOption(
            'sort',
            'Sort by wanted or origin page',
            's',
            '(wanted|origin)'
        );

        $options->registerOption(
            'skip',
            'Do not show the second dimension',
            'k'
        );
    }

    /**
     * Your main program
     *
     * Arguments and options have been parsed when this is run
     *
     * @param Options $options
     * @return void
     */
    protected function main(Options $options) {
        $args = $options->getArgs();
        if($args) {
            $startdir = dirname(wikiFN($args[0] . ':xxx'));
        } else {
            $startdir = dirname(wikiFN('xxx'));
        }

        $this->skip = $options->getOpt('skip');
        $this->sort = $options->getOpt('sort');

        $this->info("searching $startdir");

        foreach($this->getPages($startdir) as $page) {
            $this->internalLinks($page);
        }
        ksort($this->result);
        foreach($this->result as $main => $subs) {
            if($this->skip) {
                print "$main\n";
            } else {
                $subs = array_unique($subs);
                sort($subs);
                foreach($subs as $sub) {
                    printf("%-40s %s\n", $main, $sub);
                }
            }
        }
    }

    /**
     * Determine directions of the search loop
     *
     * @param string $entry
     * @param string $basepath
     * @return int
     */
    protected function dirFilter($entry, $basepath) {
        if($entry == '.' || $entry == '..') {
            return WantedPagesCLI::DIR_CONTINUE;
        }
        if(is_dir($basepath . '/' . $entry)) {
            if(strpos($entry, '_') === 0) {
                return WantedPagesCLI::DIR_CONTINUE;
            }
            return WantedPagesCLI::DIR_NS;
        }
        if(preg_match('/\.txt$/', $entry)) {
            return WantedPagesCLI::DIR_PAGE;
        }
        return WantedPagesCLI::DIR_CONTINUE;
    }

    /**
     * Collects recursively the pages in a namespace
     *
     * @param string $dir
     * @return array
     * @throws DokuCLI_Exception
     */
    protected function getPages($dir) {
        static $trunclen = null;
        if(!$trunclen) {
            global $conf;
            $trunclen = strlen($conf['datadir'] . ':');
        }

        if(!is_dir($dir)) {
            throw new DokuCLI_Exception("Unable to read directory $dir");
        }

        $pages = array();
        $dh = opendir($dir);
        while(false !== ($entry = readdir($dh))) {
            $status = $this->dirFilter($entry, $dir);
            if($status == WantedPagesCLI::DIR_CONTINUE) {
                continue;
            } else if($status == WantedPagesCLI::DIR_NS) {
                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
            } else {
                $page = array(
                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
                    'file' => $dir . '/' . $entry,
                );
                $pages[] = $page;
            }
        }
        closedir($dh);
        return $pages;
    }

    /**
     * Parse instructions and add the non-existing links to the result array
     *
     * @param array $page array with page id and file path
     */
    protected function internalLinks($page) {
        global $conf;
        $instructions = p_get_instructions(file_get_contents($page['file']));
        $cns = getNS($page['id']);
        $exists = false;
        $pid = $page['id'];
        foreach($instructions as $ins) {
            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
                $mid = $ins[1][0];
                resolve_pageid($cns, $mid, $exists);
                if(!$exists) {
                    list($mid) = explode('#', $mid); //record pages without hashes

                    if($this->sort == 'origin') {
                        $this->result[$pid][] = $mid;
                    } else {
                        $this->result[$mid][] = $pid;
                    }
                }
            }
        }
    }
}

// Main
$cli = new WantedPagesCLI();
$cli->run();