2
0
mirror of https://github.com/boostorg/website.git synced 2026-01-29 08:02:20 +00:00
Files
website/feed/bbook2rss.py
Daniel James 06983e7f2b Merge r52048,r52085,r52180-52181,r52239 from beta.
Remaing revisions for 1.39: r51130,r51725,r52086-52161,r52240.


[SVN r52242]
2009-04-07 20:12:02 +00:00

278 lines
9.2 KiB
Python
Executable File

#!/usr/bin/python
# Copyright 2007 Rene Rivera
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
import re
import optparse
import time
import xml.dom.minidom
from xml.sax.saxutils import unescape, escape
class BoostBook2RSS:
def __init__(self):
opt = optparse.OptionParser(
usage="%prog [options] input+")
opt.add_option( '--output',
help="output RSS file" )
opt.add_option( '--channel-title' )
opt.add_option( '--channel-link' )
opt.add_option( '--channel-language' )
opt.add_option( '--channel-copyright' )
opt.add_option( '--channel-description' )
opt.add_option( '--count', type='int' )
self.output = 'out.rss'
self.channel_title = ''
self.channel_link = ''
self.channel_language = 'en-us'
self.channel_copyright = 'Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)'
self.channel_description = ''
self.count = None
self.input = []
( _opt_, self.input ) = opt.parse_args(None,self)
self.rss = xml.dom.minidom.parseString('''<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:boostbook="urn:boost.org:boostbook">
<channel>
<generator>BoostBook2RSS</generator>
<title>%(title)s</title>
<link>%(link)s</link>
<description>%(description)s</description>
<language>%(language)s</language>
<copyright>%(copyright)s</copyright>
</channel>
</rss>
''' % {
'title' : self.channel_title,
'link' : self.channel_link,
'description' : self.channel_description,
'language' : self.channel_language,
'copyright' : self.channel_copyright
} )
self.add_articles()
self.gen_output()
def add_articles(self):
channel = self.get_child(self.rss.documentElement,tag='channel')
items = []
for bb in self.input:
article = xml.dom.minidom.parse(bb)
item = self.x(article.documentElement)
if item:
try:
items.append([
time.mktime(time.strptime(
article.documentElement.getAttribute('last-revision'),
'%a, %d %b %Y %H:%M:%S %Z')),
item
])
except:
items.append([time.time(),item])
items.sort(lambda x,y: -cmp(x[0],y[0]))
for item in items[0:self.count]:
channel.appendChild(item[1])
def gen_output(self):
if self.output:
out = open(self.output,'w')
else:
out = sys.stdout
if out:
self.rss.writexml(out,encoding='utf-8')
#~ Turns the internal XML tree into an output UTF-8 string.
def tostring(self):
#~ return self.boostbook.toprettyxml(' ')
return self.rss.toxml('utf-8')
def x(self, *context, **kwargs):
node = None
names = [ ]
for c in context:
if c:
if not isinstance(c,xml.dom.Node):
suffix = '_'+c.replace('-','_').replace('#','_')
else:
suffix = '_'+c.nodeName.replace('-','_').replace('#','_')
node = c
names.append('x')
names = map(lambda x: x+suffix,names)
if node:
for name in names:
if hasattr(self,name):
return getattr(self,name)(node,**kwargs)
else:
assert False, 'Unknown node type %s'%(name)
return None
def x_children( self, parent, **kwargs ):
result = []
for n in parent.childNodes:
child = self.x(n)
if child:
result.append(child)
else:
child = n.cloneNode(False)
if hasattr(child,'data'):
child.data = re.sub(r'\s+',' ',child.data)
for grandchild in self.x_children(n,**kwargs):
child.appendChild(grandchild)
return result
def x_article(self,node):
brief_xhtml = self.new_node('span',
self.x(self.get_child(self.get_child(node,tag='articleinfo'),
tag='articlepurpose'
)),
klass='brief'
)
title_xhtml = self.new_node('title',
*self.x_children(self.get_child(node,tag='title')))
description_xhtml = self.new_node('div',klass='description')
download_item = None
body_item = node.firstChild
while body_item:
if body_item.nodeName not in ['title', 'articleinfo']:
item = self.x(body_item)
if item:
download_i = self.get_child(item,tag='boostbook:download')
if download_i:
download_item = download_i
else:
description_xhtml.appendChild(item)
body_item = body_item.nextSibling
return self.new_node(
'item',
title_xhtml,
self.new_text('pubDate',node.getAttribute('last-revision')),
self.new_text('boostbook:purpose',brief_xhtml.toxml('utf-8')),
download_item,
self.new_text('description',description_xhtml.toxml('utf-8'))
)
def x__text(self,node):
return self.rss.createTextNode(node.data);
def x_para(self,node):
return self.new_node('p',
*self.x_children(node))
def x_ulink(self,node):
return self.new_node('a',
href=node.getAttribute('url'),
*self.x_children(node))
def x_section(self,node):
return self.new_node('div',
id=node.getAttribute('id'),
*self.x_children(node))
def x_title(self,node):
return self.new_node('h3',
*self.x_children(node))
def x_link(self,node):
return self.new_node('span',
klass='link',
*self.x_children(node))
def x_itemizedlist(self,node):
return self.new_node('ul',
*self.x_children(node))
def x_listitem(self,node):
return self.new_node('li',
*self.x_children(node))
def x_phrase(self,node):
return self.new_node('span',
klass=node.getAttribute('role'),
*self.x_children(node))
def x_code(self,node):
return self.new_node('code',
*self.x_children(node))
def x_literal(self,node):
return self.new_node('tt',
*self.x_children(node))
def x_emphasis(self,node):
return self.new_node('em',
*self.x_children(node))
def x_inlinemediaobject(self,node):
image = self.get_child(node,'imageobject')
if image:
image = self.get_child(image,'imagedata')
if image:
image = image.getAttribute('fileref')
alt = self.get_child(node,'textobject')
if alt:
alt = self.get_child(alt,'phrase')
if alt and alt.getAttribute('role') == 'alt':
alt = self.get_child(alt).data.strip()
else:
alt = None
if not alt:
alt = '[]'
if image:
return self.new_node('img',
src=image,
alt=alt)
else:
return None
def x_articlepurpose(self,node):
return self.new_node('span',
klass='purpose',
*self.x_children(node))
def x_download(self,node):
return self.new_text('boostbook:download',
self.get_child(node).data)
def get_child( self, root, tag = None, id = None, name = None):
for n in root.childNodes:
found = True
if tag and found:
found = found and tag == n.nodeName
if id and found:
if n.hasAttribute('id'):
found = found and n.getAttribute('id') == id
else:
found = found and n.hasAttribute('id') and n.getAttribute('id') == id
if name and found:
found = found and n.hasAttribute('name') and n.getAttribute('name') == name
if found:
return n
return None
def new_node( self, tag, *child, **kwargs ):
result = self.rss.createElement(tag)
for k in kwargs.keys():
if kwargs[k] != '':
if k == 'id':
result.setAttribute('id',kwargs[k])
elif k == 'klass':
result.setAttribute('class',kwargs[k])
else:
result.setAttribute(k,kwargs[k])
for c in child:
if c:
result.appendChild(c)
return result
def new_text( self, tag, data, **kwargs ):
result = self.new_node(tag,**kwargs)
data = data.strip()
if len(data) > 0:
result.appendChild(self.rss.createTextNode(data))
return result
BoostBook2RSS()