Blame
Date:
Fri Dec 24 05:00:30 2021 UTC
Message:
Daily backup
01
2021-12-17
jrmu
<?php if (!defined('PmWiki')) exit();
02
2021-12-17
jrmu
/* Copyright 2005-2017 Patrick R. Michaud (pmichaud@pobox.com)
03
2021-12-17
jrmu
This file is part of PmWiki; you can redistribute it and/or modify
04
2021-12-17
jrmu
it under the terms of the GNU General Public License as published
05
2021-12-17
jrmu
by the Free Software Foundation; either version 2 of the License, or
06
2021-12-17
jrmu
(at your option) any later version. See pmwiki.php for full details.
07
2021-12-17
jrmu
08
2021-12-17
jrmu
This file provides various features to allow PmWiki to control
09
2021-12-17
jrmu
what web crawlers (robots) see when they visit the site. Of course
10
2021-12-17
jrmu
it's still possible to control robots at the webserver level
11
2021-12-17
jrmu
and via robots.txt, but this page provides some finer level
12
2021-12-17
jrmu
of control.
13
2021-12-17
jrmu
14
2021-12-17
jrmu
The $MetaRobots variable controls generation of the
15
2021-12-17
jrmu
<meta name='robots' ... /> tag in the head of the HTML document.
16
2021-12-17
jrmu
By default $MetaRobots is set so that robots do not index pages in
17
2021-12-17
jrmu
the Site, SiteAdmin, and PmWiki groups.
18
2021-12-17
jrmu
19
2021-12-17
jrmu
The $RobotPattern variable is used to determine if the user agent
20
2021-12-17
jrmu
accessing the site is a robot, and $IsRobotAgent is set accordingly.
21
2021-12-17
jrmu
By default this pattern identifies Googlebot, Yahoo! Slurp, msnbot,
22
2021-12-17
jrmu
BecomeBot, and HTTrack as robots.
23
2021-12-17
jrmu
24
2021-12-17
jrmu
If the agent is deemed a robot, then the $RobotActions array is
25
2021-12-17
jrmu
checked to see if robots are allowed to perform the given action,
26
2021-12-17
jrmu
and if not the robot is immediately sent an HTTP 403 Forbidden
27
2021-12-17
jrmu
response.
28
2021-12-17
jrmu
29
2021-12-17
jrmu
If $EnableRobotCloakActions is set, then a pattern is added to
30
2021-12-17
jrmu
$FmtP to hide any "?action=" url parameters in page urls
31
2021-12-17
jrmu
generated by PmWiki for actions that robots aren't allowed to
32
2021-12-17
jrmu
access. This can greatly reduce the load on the server by
33
2021-12-17
jrmu
not providing the robot with links to pages that it will be
34
2021-12-17
jrmu
forbidden to index anyway.
35
2021-12-17
jrmu
36
2021-12-17
jrmu
Script maintained by Petko YOTOV www.pmwiki.org/petko
37
2021-12-17
jrmu
*/
38
2021-12-17
jrmu
39
2021-12-17
jrmu
## $MetaRobots provides the value for the <meta name='robots' ...> tag.
40
2021-12-17
jrmu
SDV($MetaRobots,
41
2021-12-17
jrmu
($action!='browse' || !PageExists($pagename)
42
2021-12-17
jrmu
|| preg_match('#^PmWiki[./](?!PmWiki$)|^Site(Admin)?[./]#', $pagename))
43
2021-12-17
jrmu
? 'noindex,nofollow' : 'index,follow');
44
2021-12-17
jrmu
if ($MetaRobots)
45
2021-12-17
jrmu
$HTMLHeaderFmt['robots'] =
46
2021-12-17
jrmu
" <meta name='robots' content='\$MetaRobots' />\n";
47
2021-12-17
jrmu
48
2021-12-17
jrmu
## $RobotPattern is used to identify robots.
49
2021-12-17
jrmu
SDV($RobotPattern,'\\w+[-_ ]?(bot|spider|crawler)'
50
2021-12-17
jrmu
.'|Slurp|Teoma|ia_archiver|HTTrack|XML Sitemaps|Jabse|Yandex|PageAnalyzer|Yeti|Riddler|Aboundex|ADmantX|WikiDo'
51
2021-12-17
jrmu
.'|Pinterest|Qwantify|worldwebheritage|coccoc|HostWallker|Add Catalog|idmarch|MegaIndex|heritrix|SEOdiver');
52
2021-12-17
jrmu
SDV($IsRobotAgent,
53
2021-12-17
jrmu
$RobotPattern && preg_match("!$RobotPattern!i", @$_SERVER['HTTP_USER_AGENT']));
54
2021-12-17
jrmu
if (!$IsRobotAgent) return;
55
2021-12-17
jrmu
56
2021-12-17
jrmu
## $RobotActions indicates which actions a robot is allowed to perform.
57
2021-12-17
jrmu
SDVA($RobotActions, array('browse' => 1, 'rss' => 1, 'dc' => 1));
58
2021-12-17
jrmu
if (!@$RobotActions[$action]) {
59
2021-12-17
jrmu
$pagename = ResolvePageName($pagename);
60
2021-12-17
jrmu
if (!PageExists($pagename)) {
61
2021-12-17
jrmu
header("HTTP/1.1 404 Not Found");
62
2021-12-17
jrmu
print("<h1>Not Found</h1>");
63
2021-12-17
jrmu
exit();
64
2021-12-17
jrmu
}
65
2021-12-17
jrmu
header("HTTP/1.1 403 Forbidden");
66
2021-12-17
jrmu
print("<h1>Forbidden</h1>");
67
2021-12-17
jrmu
exit();
68
2021-12-17
jrmu
}
69
2021-12-17
jrmu
70
2021-12-17
jrmu
## The following removes any ?action= parameters that robots aren't
71
2021-12-17
jrmu
## allowed to access.
72
2021-12-17
jrmu
function cb_bool($a) { return (boolean)$a; }
73
2021-12-17
jrmu
if (IsEnabled($EnableRobotCloakActions, 0)) {
74
2021-12-17
jrmu
$p = join('|', array_keys(array_filter($RobotActions, 'cb_bool')));
75
2021-12-17
jrmu
$FmtPV['$PageUrl'] =
76
2021-12-17
jrmu
'PUE(($EnablePathInfo)
77
2021-12-17
jrmu
? "\\$ScriptUrl/$group/$name"
78
2021-12-17
jrmu
: "\\$ScriptUrl?n=$group.$name")';
79
2021-12-17
jrmu
$FmtP["/(\\\$ScriptUrl[^#\"'\\s<>]+)\\?action=(?!$p)\\w+/"] = '$1';
80
2021-12-17
jrmu
}
81
2021-12-17
jrmu
IRCNow