/*============================================================================= Copyright (c) 2002 2004 2006 Joel de Guzman Copyright (c) 2004 Eric Niebler http://spirit.sourceforge.net/ Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #include "files.hpp" #include #include #include #include #include #include #include namespace quickbook { namespace { boost::unordered_map files; } // Read the first few bytes in a file to see it starts with a byte order // mark. If it doesn't, then write the characters we've already read in. // Although, given how UTF-8 works, if we've read anything in, the files // probably broken. template bool check_bom(InputIterator& begin, InputIterator end, OutputIterator out, char const* chars, int length) { char const* ptr = chars; while(begin != end && *begin == *ptr) { ++begin; ++ptr; --length; if(length == 0) return true; } // Failed to match, so write the skipped characters to storage: while(chars != ptr) *out++ = *chars++; return false; } template std::string read_bom(InputIterator& begin, InputIterator end, OutputIterator out) { if(begin == end) return ""; const char* utf8 = "\xef\xbb\xbf" ; const char* utf32be = "\0\0\xfe\xff"; const char* utf32le = "\xff\xfe\0\0"; unsigned char c = *begin; switch(c) { case 0xEF: { // UTF-8 return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : ""; } case 0xFF: // UTF-16/UTF-32 little endian return !check_bom(begin, end, out, utf32le, 2) ? "" : check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32" : "UTF-16"; case 0: // UTF-32 big endian return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : ""; case 0xFE: // UTF-16 big endian return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : ""; default: return ""; } } // Copy a string, converting mac and windows style newlines to unix // newlines. template void normalize(InputIterator begin, InputIterator end, OutputIterator out) { std::string encoding = read_bom(begin, end, out); if(encoding != "UTF-8" && encoding != "") throw load_error(encoding + " is not supported. Please use UTF-8."); while(begin != end) { if(*begin == '\r') { *out++ = '\n'; ++begin; if(begin != end && *begin == '\n') ++begin; } else { *out++ = *begin++; } } } file_ptr load(fs::path const& filename, unsigned qbk_version) { boost::unordered_map::iterator pos = files.find(filename); if (pos == files.end()) { fs::ifstream in(filename, std::ios_base::in); if (!in) throw load_error("Could not open input file."); // Turn off white space skipping on the stream in.unsetf(std::ios::skipws); std::string source; normalize( std::istream_iterator(in), std::istream_iterator(), std::back_inserter(source)); if (in.bad()) throw load_error("Error reading input file."); bool inserted; boost::tie(pos, inserted) = files.emplace( filename, new file(filename, source, qbk_version)); assert(inserted); } return pos->second; } file_position relative_position( std::string::const_iterator begin, std::string::const_iterator iterator) { file_position pos; std::string::const_iterator line_begin = begin; while (begin != iterator) { if (*begin == '\r') { ++begin; ++pos.line; line_begin = begin; } else if (*begin == '\n') { ++begin; ++pos.line; line_begin = begin; if (begin == iterator) break; if (*begin == '\r') { ++begin; line_begin = begin; } } else { ++begin; } } pos.column = iterator - line_begin + 1; return pos; } file_position file::position_of(std::string::const_iterator iterator) const { return relative_position(source.begin(), iterator); } // Mapped files. struct mapped_file_section { enum section_types { normal, empty, indented }; std::string::size_type original_pos; std::string::size_type our_pos; section_types section_type; mapped_file_section( std::string::size_type original_pos, std::string::size_type our_pos, section_types section_type = normal) : original_pos(original_pos), our_pos(our_pos), section_type(section_type) {} std::string::size_type to_original_pos(std::string::size_type pos) { switch (section_type) { case normal: return pos - our_pos + original_pos; case empty: return original_pos; case indented: // Indented doesn't really work, but that's okay because we // currently don't break up indented code. assert(pos == our_pos); return pos - our_pos + original_pos; default: assert(false); return original_pos; } } // If 'to_original_pos' worked for indented blocks, this wouldn't // be necessary. file_position calculate_position( file_position const& original, file_position const& relative) const { switch (section_type) { case normal: return file_position( original.line + relative.line - 1, relative.line == 1 ? original.column + relative.column - 1 : relative.column); case empty: return original; case indented: return file_position( original.line + relative.line - 1, original.column + relative.column - 1); default: assert(false); return file_position(); } } }; struct mapped_section_original_cmp { bool operator()(mapped_file_section const& x, mapped_file_section const& y) { return x.original_pos < y.original_pos; } bool operator()(mapped_file_section const& x, std::string::size_type const& y) { return x.original_pos < y; } bool operator()(std::string::size_type const& x, mapped_file_section const& y) { return x < y.original_pos; } }; struct mapped_section_pos_cmp { bool operator()(mapped_file_section const& x, mapped_file_section const& y) { return x.our_pos < y.our_pos; } bool operator()(mapped_file_section const& x, std::string::size_type const& y) { return x.our_pos < y; } bool operator()(std::string::size_type const& x, mapped_file_section const& y) { return x < y.our_pos; } }; struct mapped_file : file { mapped_file(file_ptr original) : file(*original, std::string()), original(original), mapped_sections() {} file_ptr original; std::vector mapped_sections; void add_empty_mapped_file_section(std::string::const_iterator pos) { std::string::size_type original_pos = pos - original->source.begin(); if (mapped_sections.empty() || mapped_sections.back().section_type != mapped_file_section::empty || mapped_sections.back().original_pos != original_pos) { mapped_sections.push_back(mapped_file_section( original_pos, source.size(), mapped_file_section::empty)); } } void add_mapped_file_section(std::string::const_iterator pos) { mapped_sections.push_back(mapped_file_section( pos - original->source.begin(), source.size())); } void add_indented_mapped_file_section(std::string::const_iterator pos) { mapped_sections.push_back(mapped_file_section( pos - original->source.begin(), source.size(), mapped_file_section::indented)); } virtual file_position position_of(std::string::const_iterator) const; }; namespace { std::list mapped_files; } struct mapped_file_builder_data { mapped_file_builder_data() { reset(); } void reset() { new_file.reset(); } boost::intrusive_ptr new_file; }; mapped_file_builder::mapped_file_builder() : data(0) {} mapped_file_builder::~mapped_file_builder() { delete data; } void mapped_file_builder::start(file_ptr f) { if (!data) { data = new mapped_file_builder_data; } assert(!data->new_file); data->new_file = new mapped_file(f); } file_ptr mapped_file_builder::release() { file_ptr r = data->new_file; data->reset(); return r; } void mapped_file_builder::clear() { data->reset(); } bool mapped_file_builder::empty() const { return data->new_file->source.empty(); } mapped_file_builder::pos mapped_file_builder::get_pos() const { return data->new_file->source.size(); } void mapped_file_builder::add(char const* x, iterator pos) { data->new_file->add_empty_mapped_file_section(pos); data->new_file->source.append(x); } void mapped_file_builder::add(std::string const& x, iterator pos) { data->new_file->add_empty_mapped_file_section(pos); data->new_file->source.append(x); } void mapped_file_builder::add(iterator begin, iterator end) { data->new_file->add_mapped_file_section(begin); data->new_file->source.append(begin, end); } void mapped_file_builder::add(mapped_file_builder const& x) { add(x, 0, x.data->new_file->source.size()); } void mapped_file_builder::add(mapped_file_builder const& x, pos begin, pos end) { assert(data->new_file->original == x.data->new_file->original); assert(begin <= x.data->new_file->source.size()); assert(end <= x.data->new_file->source.size()); if (begin != end) { std::vector::iterator start = boost::upper_bound(x.data->new_file->mapped_sections, begin, mapped_section_pos_cmp()); assert(start != x.data->new_file->mapped_sections.begin()); --start; std::string::size_type size = data->new_file->source.size(); data->new_file->mapped_sections.push_back(mapped_file_section( start->to_original_pos(begin), size, start->section_type)); for (++start; start != x.data->new_file->mapped_sections.end() && start->our_pos < end; ++start) { data->new_file->mapped_sections.push_back(mapped_file_section( start->original_pos, start->our_pos - begin + size, start->section_type)); } data->new_file->source.append( x.data->new_file->source.begin() + begin, x.data->new_file->source.begin() + end); } } void mapped_file_builder::unindent_and_add(iterator begin, iterator end) { std::string program(begin, end); // Erase leading blank lines and newlines: std::string::size_type start = program.find_first_not_of(" \t"); if (start != std::string::npos && (program[start] == '\r' || program[start] == '\n')) { program.erase(0, start); } start = program.find_first_not_of("\r\n"); program.erase(0, start); if (program.size() == 0) return; // nothing left to do // Get the first line indent std::string::size_type indent = program.find_first_not_of(" \t"); std::string::size_type pos = 0; if (std::string::npos == indent) { // Nothing left to do here. The code is empty (just spaces). // We clear the program to signal the caller that it is empty // and return early. program.clear(); return; } // Calculate the minimum indent from the rest of the lines do { pos = program.find_first_not_of("\r\n", pos); if (std::string::npos == pos) break; std::string::size_type n = program.find_first_not_of(" \t", pos); if (n != std::string::npos) { char ch = program[n]; if (ch != '\r' && ch != '\n') // ignore empty lines indent = (std::min)(indent, n-pos); } } while (std::string::npos != (pos = program.find_first_of("\r\n", pos))); // Trim white spaces from column 0..indent pos = 0; program.erase(0, indent); while (std::string::npos != (pos = program.find_first_of("\r\n", pos))) { if (std::string::npos == (pos = program.find_first_not_of("\r\n", pos))) { break; } std::string::size_type next = program.find_first_of("\r\n", pos); program.erase(pos, (std::min)(indent, next-pos)); } data->new_file->add_indented_mapped_file_section(begin + indent); data->new_file->source.append(program); } file_position mapped_file::position_of(std::string::const_iterator pos) const { std::vector::const_iterator section = boost::upper_bound(mapped_sections, std::string::size_type(pos - source.begin()), mapped_section_pos_cmp()); assert(section != mapped_sections.begin()); --section; return section->calculate_position( original->position_of( original->source.begin() + section->original_pos), relative_position(source.begin() + section->our_pos, pos) ); } }