Using WordPress Export

I bragged recently to a colleague at work that I mostly avoid posts about wordpress. I finally sucumbed, but this will hopefully be actually useful to someone. I have my filk blog, and sometimes I sing in areas which do not have internet. I wanted a quick way to dump it all to a static directory. The script below still has bugs when dealing with unicode, but mostly, it works pretty well, and it is useful to me.

<pre>
#!/usr/bin/python
import os, optparse
from xml.dom import minidom

def parseXML(fin):
    s = fin.read()
    # HACK: sometimes &nbsp; appears where it should not. Override
    s = s.replace('&nbsp;', ' ')
    return minidom.parseString(s)

def items(dom):
    for element in dom.getElementsByTagName('item'):
        post_name_node, = element.getElementsByTagName('wp:post_name')
        post_name = post_name_node.childNodes[0].wholeText
        content_node, = element.getElementsByTagName('content:encoded')
        content = content_node.childNodes[0].wholeText
        yield post_name, content.replace('\n', '<br />\n')

parser = optparse.OptionParser()
parser.add_option('-i', '--input-file', dest='input_file')
parser.add_option('-d', '--output-dir', dest='output_dir')

(options, args) = parser.parse_args()

if not options.input_file or not options.output_dir:
    parser.error('-i and -d are mandatory')
if os.path.exists(options.output_dir):
    parser.error('%s already exists -- will not overwrite' % options.output_dir)

dom = parseXML(file(options.input_file))
os.makedirs(options.output_dir)
index = file(os.path.join(options.output_dir, 'index.html'), 'w')
print >>index, '<html><body><ul>'
for name, content in items(dom):
    fp = file(os.path.join(options.output_dir, name)+'.html', 'w')
    print "writing",  name
    print >> index, '<li><a href="%(name)s.html">%(name)s</a></li>' % vars()
    fp.write(content.encode('utf-8'))
    fp.close()
index.close()
</pre>
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: