mirror of
https://github.com/boostorg/quickbook.git
synced 2026-01-27 07:02:15 +00:00
- Use cananoical filenames where possible. - Remove duplicates. - Include SVG files. - Less hacky. [SVN r77490]
510 lines
16 KiB
C++
510 lines
16 KiB
C++
/*=============================================================================
|
|
Copyright (c) 2002 2004 2006 Joel de Guzman
|
|
Copyright (c) 2004 Eric Niebler
|
|
http://spirit.sourceforge.net/
|
|
|
|
Use, modification and distribution is subject to the Boost Software
|
|
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt)
|
|
=============================================================================*/
|
|
#include "files.hpp"
|
|
#include <boost/filesystem/fstream.hpp>
|
|
#include <boost/unordered_map.hpp>
|
|
#include <boost/range/algorithm/upper_bound.hpp>
|
|
#include <boost/range/algorithm/transform.hpp>
|
|
#include <boost/foreach.hpp>
|
|
#include <fstream>
|
|
#include <iterator>
|
|
|
|
namespace quickbook
|
|
{
|
|
namespace
|
|
{
|
|
boost::unordered_map<fs::path, file_ptr> files;
|
|
}
|
|
|
|
// Read the first few bytes in a file to see it starts with a byte order
|
|
// mark. If it doesn't, then write the characters we've already read in.
|
|
// Although, given how UTF-8 works, if we've read anything in, the files
|
|
// probably broken.
|
|
|
|
template <typename InputIterator, typename OutputIterator>
|
|
bool check_bom(InputIterator& begin, InputIterator end,
|
|
OutputIterator out, char const* chars, int length)
|
|
{
|
|
char const* ptr = chars;
|
|
|
|
while(begin != end && *begin == *ptr) {
|
|
++begin;
|
|
++ptr;
|
|
--length;
|
|
if(length == 0) return true;
|
|
}
|
|
|
|
// Failed to match, so write the skipped characters to storage:
|
|
while(chars != ptr) *out++ = *chars++;
|
|
|
|
return false;
|
|
}
|
|
|
|
template <typename InputIterator, typename OutputIterator>
|
|
std::string read_bom(InputIterator& begin, InputIterator end,
|
|
OutputIterator out)
|
|
{
|
|
if(begin == end) return "";
|
|
|
|
const char* utf8 = "\xef\xbb\xbf" ;
|
|
const char* utf32be = "\0\0\xfe\xff";
|
|
const char* utf32le = "\xff\xfe\0\0";
|
|
|
|
unsigned char c = *begin;
|
|
switch(c)
|
|
{
|
|
case 0xEF: { // UTF-8
|
|
return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : "";
|
|
}
|
|
case 0xFF: // UTF-16/UTF-32 little endian
|
|
return !check_bom(begin, end, out, utf32le, 2) ? "" :
|
|
check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32" : "UTF-16";
|
|
case 0: // UTF-32 big endian
|
|
return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : "";
|
|
case 0xFE: // UTF-16 big endian
|
|
return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : "";
|
|
default:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
// Copy a string, converting mac and windows style newlines to unix
|
|
// newlines.
|
|
|
|
template <typename InputIterator, typename OutputIterator>
|
|
void normalize(InputIterator begin, InputIterator end,
|
|
OutputIterator out)
|
|
{
|
|
std::string encoding = read_bom(begin, end, out);
|
|
|
|
if(encoding != "UTF-8" && encoding != "")
|
|
throw load_error(encoding +
|
|
" is not supported. Please use UTF-8.");
|
|
|
|
while(begin != end) {
|
|
if(*begin == '\r') {
|
|
*out++ = '\n';
|
|
++begin;
|
|
if(begin != end && *begin == '\n') ++begin;
|
|
}
|
|
else {
|
|
*out++ = *begin++;
|
|
}
|
|
}
|
|
}
|
|
|
|
file_ptr load(fs::path const& filename, unsigned qbk_version)
|
|
{
|
|
boost::unordered_map<fs::path, file_ptr>::iterator pos
|
|
= files.find(filename);
|
|
|
|
if (pos == files.end())
|
|
{
|
|
fs::ifstream in(filename, std::ios_base::in);
|
|
|
|
if (!in)
|
|
throw load_error("Could not open input file.");
|
|
|
|
// Turn off white space skipping on the stream
|
|
in.unsetf(std::ios::skipws);
|
|
|
|
std::string source;
|
|
normalize(
|
|
std::istream_iterator<char>(in),
|
|
std::istream_iterator<char>(),
|
|
std::back_inserter(source));
|
|
|
|
if (in.bad())
|
|
throw load_error("Error reading input file.");
|
|
|
|
bool inserted;
|
|
|
|
boost::tie(pos, inserted) = files.emplace(
|
|
filename, new file(filename, source, qbk_version));
|
|
|
|
assert(inserted);
|
|
}
|
|
|
|
return pos->second;
|
|
}
|
|
|
|
file_position relative_position(
|
|
std::string::const_iterator begin,
|
|
std::string::const_iterator iterator)
|
|
{
|
|
file_position pos;
|
|
std::string::const_iterator line_begin = begin;
|
|
|
|
while (begin != iterator)
|
|
{
|
|
if (*begin == '\r')
|
|
{
|
|
++begin;
|
|
++pos.line;
|
|
line_begin = begin;
|
|
}
|
|
else if (*begin == '\n')
|
|
{
|
|
++begin;
|
|
++pos.line;
|
|
line_begin = begin;
|
|
if (begin == iterator) break;
|
|
if (*begin == '\r')
|
|
{
|
|
++begin;
|
|
line_begin = begin;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
++begin;
|
|
}
|
|
}
|
|
|
|
pos.column = iterator - line_begin + 1;
|
|
return pos;
|
|
}
|
|
|
|
file_position file::position_of(std::string::const_iterator iterator) const
|
|
{
|
|
return relative_position(source.begin(), iterator);
|
|
}
|
|
|
|
// Mapped files.
|
|
|
|
struct mapped_file_section
|
|
{
|
|
enum section_types {
|
|
normal,
|
|
empty,
|
|
indented
|
|
};
|
|
|
|
std::string::size_type original_pos;
|
|
std::string::size_type our_pos;
|
|
section_types section_type;
|
|
|
|
mapped_file_section(
|
|
std::string::size_type original_pos,
|
|
std::string::size_type our_pos,
|
|
section_types section_type = normal) :
|
|
original_pos(original_pos), our_pos(our_pos), section_type(section_type) {}
|
|
|
|
std::string::size_type to_original_pos(std::string::size_type pos)
|
|
{
|
|
switch (section_type) {
|
|
case normal:
|
|
return pos - our_pos + original_pos;
|
|
case empty:
|
|
return original_pos;
|
|
case indented:
|
|
// Indented doesn't really work, but that's okay because we
|
|
// currently don't break up indented code.
|
|
assert(pos == our_pos);
|
|
return pos - our_pos + original_pos;
|
|
default:
|
|
assert(false);
|
|
return original_pos;
|
|
}
|
|
}
|
|
|
|
// If 'to_original_pos' worked for indented blocks, this wouldn't
|
|
// be necessary.
|
|
file_position calculate_position(
|
|
file_position const& original,
|
|
file_position const& relative) const
|
|
{
|
|
switch (section_type) {
|
|
case normal:
|
|
return file_position(
|
|
original.line + relative.line - 1,
|
|
relative.line == 1 ?
|
|
original.column + relative.column - 1 :
|
|
relative.column);
|
|
case empty:
|
|
return original;
|
|
case indented:
|
|
return file_position(
|
|
original.line + relative.line - 1,
|
|
original.column + relative.column - 1);
|
|
default:
|
|
assert(false);
|
|
return file_position();
|
|
}
|
|
}
|
|
};
|
|
|
|
struct mapped_section_original_cmp
|
|
{
|
|
bool operator()(mapped_file_section const& x,
|
|
mapped_file_section const& y)
|
|
{
|
|
return x.original_pos < y.original_pos;
|
|
}
|
|
|
|
bool operator()(mapped_file_section const& x,
|
|
std::string::size_type const& y)
|
|
{
|
|
return x.original_pos < y;
|
|
}
|
|
|
|
bool operator()(std::string::size_type const& x,
|
|
mapped_file_section const& y)
|
|
{
|
|
return x < y.original_pos;
|
|
}
|
|
};
|
|
|
|
struct mapped_section_pos_cmp
|
|
{
|
|
bool operator()(mapped_file_section const& x,
|
|
mapped_file_section const& y)
|
|
{
|
|
return x.our_pos < y.our_pos;
|
|
}
|
|
|
|
bool operator()(mapped_file_section const& x,
|
|
std::string::size_type const& y)
|
|
{
|
|
return x.our_pos < y;
|
|
}
|
|
|
|
bool operator()(std::string::size_type const& x,
|
|
mapped_file_section const& y)
|
|
{
|
|
return x < y.our_pos;
|
|
}
|
|
};
|
|
|
|
struct mapped_file : file
|
|
{
|
|
mapped_file(file_ptr original) :
|
|
file(*original, std::string()),
|
|
original(original), mapped_sections()
|
|
{}
|
|
|
|
file_ptr original;
|
|
std::vector<mapped_file_section> mapped_sections;
|
|
|
|
void add_empty_mapped_file_section(std::string::const_iterator pos) {
|
|
std::string::size_type original_pos =
|
|
pos - original->source.begin();
|
|
|
|
if (mapped_sections.empty() ||
|
|
mapped_sections.back().section_type !=
|
|
mapped_file_section::empty ||
|
|
mapped_sections.back().original_pos != original_pos)
|
|
{
|
|
mapped_sections.push_back(mapped_file_section(
|
|
original_pos, source.size(),
|
|
mapped_file_section::empty));
|
|
}
|
|
}
|
|
|
|
void add_mapped_file_section(std::string::const_iterator pos) {
|
|
mapped_sections.push_back(mapped_file_section(
|
|
pos - original->source.begin(), source.size()));
|
|
}
|
|
|
|
void add_indented_mapped_file_section(std::string::const_iterator pos) {
|
|
mapped_sections.push_back(mapped_file_section(
|
|
pos - original->source.begin(), source.size(),
|
|
mapped_file_section::indented));
|
|
}
|
|
|
|
virtual file_position position_of(std::string::const_iterator) const;
|
|
};
|
|
|
|
namespace {
|
|
std::list<mapped_file> mapped_files;
|
|
}
|
|
|
|
struct mapped_file_builder_data
|
|
{
|
|
mapped_file_builder_data() { reset(); }
|
|
void reset() { new_file.reset(); }
|
|
|
|
boost::intrusive_ptr<mapped_file> new_file;
|
|
};
|
|
|
|
mapped_file_builder::mapped_file_builder() : data(0) {}
|
|
mapped_file_builder::~mapped_file_builder() { delete data; }
|
|
|
|
void mapped_file_builder::start(file_ptr f)
|
|
{
|
|
if (!data) {
|
|
data = new mapped_file_builder_data;
|
|
}
|
|
|
|
assert(!data->new_file);
|
|
data->new_file = new mapped_file(f);
|
|
}
|
|
|
|
file_ptr mapped_file_builder::release()
|
|
{
|
|
file_ptr r = data->new_file;
|
|
data->reset();
|
|
return r;
|
|
}
|
|
|
|
void mapped_file_builder::clear()
|
|
{
|
|
data->reset();
|
|
}
|
|
|
|
bool mapped_file_builder::empty() const
|
|
{
|
|
return data->new_file->source.empty();
|
|
}
|
|
|
|
mapped_file_builder::pos mapped_file_builder::get_pos() const
|
|
{
|
|
return data->new_file->source.size();
|
|
}
|
|
|
|
void mapped_file_builder::add(char const* x, iterator pos)
|
|
{
|
|
data->new_file->add_empty_mapped_file_section(pos);
|
|
data->new_file->source.append(x);
|
|
}
|
|
|
|
void mapped_file_builder::add(std::string const& x, iterator pos)
|
|
{
|
|
data->new_file->add_empty_mapped_file_section(pos);
|
|
data->new_file->source.append(x);
|
|
}
|
|
|
|
void mapped_file_builder::add(iterator begin, iterator end)
|
|
{
|
|
data->new_file->add_mapped_file_section(begin);
|
|
data->new_file->source.append(begin, end);
|
|
}
|
|
|
|
void mapped_file_builder::add(mapped_file_builder const& x)
|
|
{
|
|
add(x, 0, x.data->new_file->source.size());
|
|
}
|
|
|
|
void mapped_file_builder::add(mapped_file_builder const& x,
|
|
pos begin, pos end)
|
|
{
|
|
assert(data->new_file->original == x.data->new_file->original);
|
|
assert(begin <= x.data->new_file->source.size());
|
|
assert(end <= x.data->new_file->source.size());
|
|
|
|
if (begin != end)
|
|
{
|
|
std::vector<mapped_file_section>::iterator start =
|
|
boost::upper_bound(x.data->new_file->mapped_sections,
|
|
begin, mapped_section_pos_cmp());
|
|
assert(start != x.data->new_file->mapped_sections.begin());
|
|
--start;
|
|
|
|
std::string::size_type size = data->new_file->source.size();
|
|
|
|
data->new_file->mapped_sections.push_back(mapped_file_section(
|
|
start->to_original_pos(begin), size,
|
|
start->section_type));
|
|
|
|
for (++start; start != x.data->new_file->mapped_sections.end() &&
|
|
start->our_pos < end; ++start)
|
|
{
|
|
data->new_file->mapped_sections.push_back(mapped_file_section(
|
|
start->original_pos, start->our_pos - begin + size,
|
|
start->section_type));
|
|
}
|
|
|
|
data->new_file->source.append(
|
|
x.data->new_file->source.begin() + begin,
|
|
x.data->new_file->source.begin() + end);
|
|
}
|
|
}
|
|
|
|
void mapped_file_builder::unindent_and_add(iterator begin, iterator end)
|
|
{
|
|
std::string program(begin, end);
|
|
|
|
// Erase leading blank lines and newlines:
|
|
std::string::size_type start = program.find_first_not_of(" \t");
|
|
if (start != std::string::npos &&
|
|
(program[start] == '\r' || program[start] == '\n'))
|
|
{
|
|
program.erase(0, start);
|
|
}
|
|
start = program.find_first_not_of("\r\n");
|
|
program.erase(0, start);
|
|
|
|
if (program.size() == 0)
|
|
return; // nothing left to do
|
|
|
|
// Get the first line indent
|
|
std::string::size_type indent = program.find_first_not_of(" \t");
|
|
std::string::size_type pos = 0;
|
|
if (std::string::npos == indent)
|
|
{
|
|
// Nothing left to do here. The code is empty (just spaces).
|
|
// We clear the program to signal the caller that it is empty
|
|
// and return early.
|
|
program.clear();
|
|
return;
|
|
}
|
|
|
|
// Calculate the minimum indent from the rest of the lines
|
|
do
|
|
{
|
|
pos = program.find_first_not_of("\r\n", pos);
|
|
if (std::string::npos == pos)
|
|
break;
|
|
|
|
std::string::size_type n = program.find_first_not_of(" \t", pos);
|
|
if (n != std::string::npos)
|
|
{
|
|
char ch = program[n];
|
|
if (ch != '\r' && ch != '\n') // ignore empty lines
|
|
indent = (std::min)(indent, n-pos);
|
|
}
|
|
}
|
|
while (std::string::npos != (pos = program.find_first_of("\r\n", pos)));
|
|
|
|
// Trim white spaces from column 0..indent
|
|
pos = 0;
|
|
program.erase(0, indent);
|
|
while (std::string::npos != (pos = program.find_first_of("\r\n", pos)))
|
|
{
|
|
if (std::string::npos == (pos = program.find_first_not_of("\r\n", pos)))
|
|
{
|
|
break;
|
|
}
|
|
|
|
std::string::size_type next = program.find_first_of("\r\n", pos);
|
|
program.erase(pos, (std::min)(indent, next-pos));
|
|
}
|
|
|
|
data->new_file->add_indented_mapped_file_section(begin + indent);
|
|
data->new_file->source.append(program);
|
|
}
|
|
|
|
file_position mapped_file::position_of(std::string::const_iterator pos) const
|
|
{
|
|
std::vector<mapped_file_section>::const_iterator section =
|
|
boost::upper_bound(mapped_sections,
|
|
std::string::size_type(pos - source.begin()),
|
|
mapped_section_pos_cmp());
|
|
assert(section != mapped_sections.begin());
|
|
--section;
|
|
|
|
return section->calculate_position(
|
|
original->position_of(
|
|
original->source.begin() + section->original_pos),
|
|
relative_position(source.begin() + section->our_pos, pos)
|
|
);
|
|
}
|
|
}
|