#!/usr/bin/python """CGI script to produce RSS feeds of New York Times headlines Usage: - Install as CGI script (e.g. http://mydomain/cgi-bin/nyt.cgi) - In your RSS readers, subscribe to individual feeds like this: http://mydomain/cgi-bin/nyt.cgi?section=technology http://mydomain/cgi-bin/nyt.cgi?section=business etc. - Other sections: national, arts, opinion, many others """ from xml.dom.minidom import Document, Element, Text class xElement(Element): def __init__(self, *children): Element.__init__(self, str(self.__class__).split(".")[-1]) apply(self.appendChildren, children) def appendChildren(self, *children): if children: for child in children: try: self.appendChild(child) except AttributeError: for node in child: self.appendChild(node) class RSSDocument(Document): def __init__(self, root): Document.__init__(self) self.appendChild(root) class rss(xElement): def __init__(self, *channels): apply(xElement.__init__, (self,) + channels) self.attributes["version"] = "0.92" class channel(xElement): pass class item(xElement): pass class xSimpleText(xElement): def __init__(self, text): xElement.__init__(self, Text(text)) class title(xSimpleText): pass class link(xSimpleText): pass class description(xSimpleText): pass def getURLData(url): import urllib usock = urllib.urlopen(url) html = usock.read() usock.close() return html def readChannelAsXML(name): return getURLData('http://radiouser:Csm!]-tvMm@partners.userland.com/nyt/%s.xml' % name) def parseRawChannelData(rawxml): rawxml = readChannelAsXML(name) from xml.dom import minidom return minidom.parseString(rawxml) def getNodeText(node): rc = "" for child in node.childNodes: rc = rc + child.data return rc def extractRelevantBitsFromParsedChannelData(xmldoc): return [(getNodeText(node.getElementsByTagName("headline")[0]), getNodeText(node.getElementsByTagName("url")[0]), getNodeText(node.getElementsByTagName("abstract")[0])) for node in xmldoc.getElementsByTagName("article")] if __name__ == '__main__': import cgi form = cgi.FieldStorage() name = form["section"].value bits = extractRelevantBitsFromParsedChannelData(parseRawChannelData(readChannelAsXML(name))) channelNode = channel(title('New York Times %s' % name.capitalize())) for t, l, d in bits: channelNode.appendChild(item(title(t), link(l), description(d))) rssDocument = RSSDocument(rss(channelNode)) output = rssDocument.toxml() print "Content-type: text/xml" print "Content-length: %s" % len(output) print print output