2
0
mirror of https://github.com/boostorg/website.git synced 2026-02-02 09:22:19 +00:00
Files
website/common/code/boost_archive.php
Daniel James abde12a9b8 Bundle the 'init' and 'content' methods into classes.
I'm not sure exactly where I'm going here, but I'm trying to create
something more composable, which is why I used the name 'filter'.

[SVN r59885]
2010-02-25 08:41:22 +00:00

609 lines
20 KiB
PHP

<?php
/*
Copyright 2005-2008 Redshift Software, Inc.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
*/
require_once(dirname(__FILE__) . '/boost.php');
class boost_archive
{
var $version_ = NULL;
var $key_ = NULL;
var $file_ = NULL;
var $archive_ = NULL;
var $extractor_ = NULL;
var $extractor_instance_ = NULL;
var $type_ = NULL;
var $preprocess_ = NULL;
var $title_ = NULL;
var $charset_ = NULL;
var $content_ = NULL;
function boost_archive(
$pattern,
$vpath,
$content_map = array(),
$get_as_raw = false,
$archive_subdir = true,
$archive_dir = ARCHIVE_DIR,
$archive_file_prefix = ARCHIVE_FILE_PREFIX)
{
$path_parts = array();
preg_match($pattern, $vpath, $path_parts);
$info_map = array_merge($content_map, array(
array('@.*@','@[.](txt|py|rst|jam|v2|bat|sh|xml|qbk)$@i','text','text/plain'),
array('@.*@','@[.](c|h|cpp|hpp)$@i','cpp','text/plain'),
array('@.*@','@[.]png$@i','raw','image/png'),
array('@.*@','@[.]gif$@i','raw','image/gif'),
array('@.*@','@[.](jpg|jpeg|jpe)$@i','raw','image/jpeg'),
array('@.*@','@[.]css$@i','raw','text/css'),
array('@.*@','@[.]js$@i','raw','application/x-javascript'),
array('@.*@','@[.]pdf$@i','raw','application/pdf'),
array('@.*@','@[.](html|htm)$@i','raw','text/html'),
array('@.*@','@[^.](Jamroot|Jamfile|ChangeLog)$@i','text','text/plain'),
array('@.*@','@[.]dtd$@i','raw','application/xml-dtd'),
));
$this->version_ = $path_parts[1];
$this->key_ = $path_parts[2];
if ($archive_subdir)
{
$this->file_ = $archive_file_prefix . $this->version_ . '/' . $this->key_;
}
else
{
$this->file_ = $archive_file_prefix . $this->key_;
}
$this->archive_ = str_replace('\\','/', $archive_dir . '/' . $this->version_ . '.zip');
foreach ($info_map as $i)
{
if (preg_match($i[1],$this->key_))
{
$this->extractor_ = $i[2];
$this->type_ = $i[3];
$this->preprocess_ = isset($i[4]) ? $i[4] : NULL;
break;
}
}
$unzip =
UNZIP
.' -p '.escapeshellarg($this->archive_)
.' '.escapeshellarg($this->file_);
if (! $this->extractor_)
{
# File doesn't exist, or we don't know how to handle it.
$this->extractor_ = 'h404';
$this->extractor_instance_ = new h404_filter;
$this->extractor_instance_->init($this);
}
else if ($get_as_raw || $this->extractor_ == 'raw')
{
$this->_extract_raw($unzip);
$this->extractor_instance_ = new raw_filter;
//~ print "--- $unzip";
}
else
{
/* We pre-extract so we can get this like meta tag information
before we have to print it out. */
$this->content_ = $this->_extract_string($unzip);
$extractor_name = $this->extractor_.'_filter';
$this->extractor_instance_ = new $extractor_name;
$this->extractor_instance_->init($this);
if($this->preprocess_) {
$this->content_ = call_user_func($this->preprocess_, $this->content_);
}
if ($this->extractor_ == 'simple')
{
$this->extractor_instance_->content($this);
}
}
}
function content_head()
{
$charset = $this->charset_ ? $this->charset_ : 'us-ascii';
$title = $this->title_ ? 'Boost C++ Libraries - '.$this->title_ : 'Boost C++ Libraries';
print <<<HTML
<meta http-equiv="Content-Type" content="text/html; charset=${charset}" />
<title>${title}</title>
HTML;
}
function is_basic()
{
return $this->extractor_ == 'basic';
}
function is_raw()
{
return $this->extractor_ == 'raw' || $this->extractor_ == 'simple';
}
function _extract_string($unzip)
{
$file_handle = popen($unzip,'r');
$text = '';
while ($file_handle && !feof($file_handle)) {
$text .= fread($file_handle,8*1024);
}
$exit_status = pclose($file_handle);
if($exit_status == 0) {
return $text;
}
else {
$this->extractor_ = '404';
return strstr($_SERVER['HTTP_HOST'], 'beta')
? unzip_error($exit_status) : '';
}
}
function _extract_raw($unzip)
{
header('Content-type: '.$this->type_);
## header('Content-Disposition: attachment; filename="downloaded.pdf"');
$file_handle = popen($unzip,'rb');
fpassthru($file_handle);
$exit_status = pclose($file_handle);
// Don't display errors for a corrupt zip file, as we seemd to
// be getting them for legitimate files.
if($exit_status > 3)
echo 'Error extracting file: '.unzip_error($exit_status);
}
function content()
{
if ($this->extractor_instance_)
{
$this->extractor_instance_->content($this);
}
}
}
class raw_filter
{
// No methods, since they shouldn't be called at the moment.
}
class text_filter
{
function init($archive)
{
$archive->title_ = htmlentities($archive->key_);
}
function content($archive)
{
print "<h3>".htmlentities($archive->key_)."</h3>\n";
print "<pre>\n";
print htmlentities($archive->content_);
print "</pre>\n";
}
}
class cpp_filter
{
function init($archive)
{
$archive->title_ = htmlentities($archive->key_);
}
function content($archive)
{
$text = htmlentities($archive->content_);
print "<h3>".htmlentities($archive->key_)."</h3>\n";
print "<pre>\n";
$root = dirname(preg_replace('@([^/]+/)@','../',$archive->key_));
$text = preg_replace(
'@(#[ ]*include[ ]+&lt;)(boost[^&]+)@Ssm',
'${1}<a href="'.$root.'/${2}">${2}</a>',
$text );
$text = preg_replace(
'@(#[ ]*include[ ]+&quot;)(boost[^&]+)@Ssm',
'${1}<a href="'.$root.'/${2}">${2}</a>',
$text );
print $text;
print "</pre>\n";
}
}
class html_base
{
function init($archive)
{
preg_match('@text/html; charset=([^\s"\']+)@i',$archive->content_,$charset);
if (isset($charset[1]))
{
$archive->charset_ = $charset[1];
}
preg_match('@<title>([^<]+)</title>@i',$archive->content_,$title);
if (isset($title[1]))
{
$archive->title_ = $title[1];
}
}
function content($archive)
{
$text = $archive->content_;
$text = preg_replace(
'@href="?http://www.boost.org/?([^"\s]*)"?@i',
'href="/${1}"',
$text );
$text = preg_replace(
'@href="?http://boost.org/?([^"\s]*)"?@i',
'href="/${1}"',
$text );
$text = preg_replace(
'@href="?(?:\.\./)+people/(.*\.htm)"?@i',
'href="/users/people/${1}l"',
$text );
$text = preg_replace(
'@href="?(?:\.\./)+(LICENSE_[^"\s]*\.txt)"?@i',
'href="/${1}"',
$text );
$text = preg_replace(
'@<a\s+(class="[^"]+")?\s*href="?(http|mailto)(:[^"\s]*)"?@i',
'<a class="external" href="${2}${3}"',
$text );
return $text;
}
}
class boost_book_filter extends html_base
{
function init($archive)
{
parent::init($archive);
}
function content($archive)
{
$text = parent::content($archive);
$text = substr($text,strpos($text,'<div class="spirit-nav">'));
$text = substr($text,0,strpos($text,'</body>'));
$text = str_replace('<hr>','',$text);
$text = str_replace('<table width="100%">','<table class="footer-table">',$text);
$text = str_replace('<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%">','<table class="footer-table">',$text);
$text = preg_replace(
'@[\s]+(border|cellpadding|cellspacing|width|height|valign|frame|rules|naturalsizeflag|background)=[^\s>]+@i',
'',
$text );
/* */
for ($i = 0; $i < 8; $i++) {
$text = preg_replace(
'@<img src="[\./a-z]*images/(prev|up|home|next|tip|note|warning|important|caution|sidebar|hint|alert)\.png" alt="([^"]+)"([ /]*)>@Ssm',
'<img src="/gfx/space.png" alt="${2}" class="${1}_image" />',
$text );
}
/* */
print $text;
}
}
class boost_libs_filter extends html_base
{
function init($archive)
{
parent::init($archive);
}
function content($archive)
{
$text = parent::content($archive);
preg_match('@<body[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
preg_match('@</body>@i',$text,$body_end,PREG_OFFSET_CAPTURE);
if (!isset($body_begin[0]))
{
//~ Attempt to recover some content from illegal HTML that is missing the body tag.
preg_match('@</head>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
}
if (!isset($body_begin[0]))
{
//~ Attempt to recover some content from illegal HTML that is missing the body tag.
preg_match('@<html[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
}
if (!isset($body_begin[0]))
{
//~ Attempt to recover some content from illegal HTML that is missing the body tag.
preg_match('@<(hr|div|img|p|h1|h2|h3|h4)[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
}
if (!isset($body_begin[0]))
{
return;
}
else if (!isset($body_end[0]))
{
$text = substr($text,
$body_begin[0][1]+strlen($body_begin[0][0]));
}
else
{
$text = substr($text,
$body_begin[0][1]+strlen($body_begin[0][0]),
$body_end[0][1]-($body_begin[0][1]+strlen($body_begin[0][0])) );
}
# nasty code, because (?!fubar) causes an ICE...
preg_match('@<table[^<>]*>?@i',$text,$table_begin,PREG_OFFSET_CAPTURE);
preg_match('@</table>@i',$text,$table_end,PREG_OFFSET_CAPTURE);
if (isset($table_begin[0]) && isset($table_end[0])) {
$table_contents_start = $table_begin[0][1] + strlen($table_begin[0][0]);
$table_contents = substr($text, $table_contents_start,
$table_end[0][1] - $table_contents_start);
if(strpos($table_contents, 'boost.png') !== FALSE) {
preg_match('@<td[^<>]*>?([^<]*<(h[12]|p).*?)</td>@is', $table_contents,
$table_contents_header, PREG_OFFSET_CAPTURE);
$text = (isset($table_contents_header[1]) ? $table_contents_header[1][0] : '').
substr($text, $table_end[0][1] + 8);
}
}
#else
#{
# $text = substr($text,$h1_begin[0][1]);
#}
#if (isset($title[1]))
#{
# $text = "<h1>${title[1]}</h1>\n" . $text;
#}
$text = preg_replace(
'@(<a[^>]+>[\s]*)?<img.*boost\.png[^>]*>([\s]*</a>)?@i',
'',
$text );
$text = preg_replace(
'@<img(.*)align="?right"?[^>]*>@i',
'<img${1} class="right-inset" />',
$text );
$text = preg_replace(
'@<img(.*)align="?absmiddle"?[^>]*>@i',
'<img${1} class="inline" />',
$text );
/* Remove certain attributes */
$text = preg_replace(
'@[\s]+(border|cellpadding|cellspacing|width|height|valign|align|frame|rules|naturalsizeflag|background)=("[^"]*"?|\'[^\']*\'?|[^\s/>]+)@i',
'',
$text );
$text = preg_replace(
'@<table[\s]+(border)[^\s>]*@i',
'<table',
$text );
$text = preg_replace(
'@<[/]?(font|hr)[^>]*>@i',
'',
$text );
$text = preg_replace(
'@<([^\s]+)[\s]+>@i',
'<${1}>',
$text );
$text = _preg_replace_bounds(
'@<blockquote>[\s]*(<pre>)@i','@(</pre>)[\s]*</blockquote>@i',
'${1}','${1}',
$text );
$text = _preg_replace_bounds(
'@<blockquote>[\s]*(<p>)@i','@(</p>)[\s]*</blockquote>@i',
'${1}','${1}',
$text );
$text = _preg_replace_bounds(
'@<blockquote>[\s]*(<table>)@i','@(</table>)[\s]*</blockquote>@i',
'${1}','${1}',
$text );
$text = _preg_replace_bounds(
'@<blockquote>[\s]*<li>@i','@</li>[\s]*</blockquote>@i',
'<ul><li>','</li></ul>',
$text );
$text = _preg_replace_bounds(
'@(?:<blockquote>[\s]*)+<h2>@i','@</h2>(?:[\s]*</blockquote>)+@i',
'<h2>','</h2>',
$text );
$text = preg_replace(
'@(<a name=[^\s>]+[\s]*>)[\s]*(</?[^a])@i',
'${1}</a>${2}',
$text );
$text = preg_replace(
'@<table>([\s]+<tr>[\s]+<td>.*_arr.*</td>[\s]+<td>.*</td>[\s]+<td>.*</td>[\s]+</tr>[\s]+)</table>@i',
'<table class="pyste-nav">${1}</table>',
$text );
$text = preg_replace(
'@<table>([\s]+<tr>[\s]+<td)[\s]+class="note_box">@i',
'<table class="note_box">${1}>',
$text );
$text = preg_replace(
'@<table>([\s]+<tr>[\s]+<td[\s]+class="table_title">)@i',
'<table class="toc">${1}',
$text );
$text = preg_replace(
'@src=".*theme/u_arr\.gif"@i',
'src="/gfx/space.png" class="up_image"',
$text );
$text = preg_replace(
'@src=".*theme/l_arr\.gif"@i',
'src="/gfx/space.png" class="prev_image"',
$text );
$text = preg_replace(
'@src=".*theme/r_arr\.gif"@i',
'src="/gfx/space.png" class="next_image"',
$text );
$text = preg_replace(
'@src=".*theme/u_arr_disabled\.gif"@i',
'src="/gfx/space.png" class="up_image_disabled"',
$text );
$text = preg_replace(
'@src=".*theme/l_arr_disabled\.gif"@i',
'src="/gfx/space.png" class="prev_image_disabled"',
$text );
$text = preg_replace(
'@src=".*theme/r_arr_disabled\.gif"@i',
'src="/gfx/space.png" class="next_image_disabled"',
$text );
$text = preg_replace(
'@src=".*theme/note\.gif"@i',
'src="/gfx/space.png" class="note_image"',
$text );
$text = preg_replace(
'@src=".*theme/alert\.gif"@i',
'src="/gfx/space.png" class="caution_image"',
$text );
$text = preg_replace(
'@src=".*theme/bulb\.gif"@i',
'src="/gfx/space.png" class="tip_image"',
$text );
$text = preg_replace(
'@<img src=".*theme/(?:bullet|lens)\.gif">@i',
'',
$text );
$text = preg_replace(
'@(<img src=".*theme/(?:arrow)\.gif")>@i',
'${1} class="inline">',
$text );
print $text;
}
}
class boost_frame1_filter extends html_base
{
function init($archive)
{
parent::init($archive);
}
function content($archive)
{
$text = parent::content($archive);
$text = substr($text,strpos($text,'<div class="spirit-nav">'));
$text = substr($text,0,strpos($text,'</body>'));
for ($i = 0; $i < 8; $i++) {
$text = preg_replace(
'@<img src="[\./]*images/(.*\.png)" alt="(.*)"([ ][/])?>@Ssm',
'<img src="/style/css_0/${1}" alt="${2}" />',
$text );
}
$text = str_replace('<hr>','',$text);
$text = str_replace('<table width="100%">','<table class="footer-table">',$text);
$text = preg_replace(
'@[\s]+(border|cellpadding|cellspacing|width|height|valign|frame|rules|naturalsizeflag|background)=[^\s>]+@i',
'',
$text );
print $text;
}
}
class simple_filter extends html_base
{
function init($archive)
{
}
function content($archive)
{
print parent::content($archive);
}
}
class basic_filter extends html_base
{
function init($archive)
{
}
function content($archive)
{
$text = parent::content($archive);
$is_xhtml = preg_match('@<!DOCTYPE[^>]*xhtml@i', $text);
$tag_end = $is_xhtml ? '/>' : '>';
$sections = preg_split('@(</head>|<body[^>]*>)@i',$text,-1,PREG_SPLIT_DELIM_CAPTURE);
$body_index = 0;
$index = 0;
foreach($sections as $section) {
if(stripos($section, '<body') === 0) {
$body_index = $index;
break;
}
++$index;
}
if(!$body_index) {
print($text);
}
else {
$index = 0;
foreach($sections as $section) {
print($section);
if($index == 0) {
print '<link rel="icon" href="/favicon.ico" type="image/ico"'.$tag_end;
print '<link rel="stylesheet" type="text/css" href="/style/section-basic.css"'.$tag_end;
}
else if($index == $body_index) {
virtual("/common/heading-doc.html");
}
++$index;
}
}
}
}
class h404_filter
{
function init($archive)
{
header("HTTP/1.0 404 Not Found");
}
function content($archive)
{
# This might also be an error extracting the file, or because we don't
# know how to deal with the file. It would be good to give a better
# error in those cases.
print '<h1>404 Not Found</h1><p>File "' . $archive->file_ . '"not found.</p>';
if($archive->content_) {
print '<p>Unzip error: '.htmlentities($archive->content_).'</p>';
}
}
}
// Return a readable error message for unzip exit state.
function unzip_error($exit_status) {
switch($exit_status) {
case 0: return 'No error.';
case 1: return 'One or more warning errors were encountered.';
case 2: return 'A generic error in the zipfile format was detected.';
case 3: return 'A severe error in the zipfile format was detected.';
case 4: return 'Unzip was unable to allocate memory for one or more buffers during program initialization.';
case 5: return 'Unzip was unable to allocate memory or unable to obtain a tty to read the decryption password(s).';
case 6: return 'Unzip was unable to allocate memory during decompression to disk.';
case 7: return 'Unzip was unable to allocate memory during in-memory decompression.';
case 9: return 'The specified zipfile was not found.';
case 10: return 'Invalid options were specified on the command line.';
case 11: return 'No matching files were found.';
case 50: return 'The disk is (or was) full during extraction.';
case 51: return 'The end of the ZIP archive was encountered prematurely.';
case 80: return 'The user aborted unzip prematurely with control-C (or similar).';
case 81: return 'Testing or extraction of one or more files failed due to unsupported compression methods or unsupported decryption.';
case 82: return 'No files were found due to bad decryption password(s).';
default: return 'Unknown unzip error code: ' + $exit_status;
}
}
?>