Revision: 17237
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 28, 2009 00:10 by mustam
Initial Code
import os
import os.path
import sys
from codecs import open
from time import strftime, localtime, time
from xml.dom.minidom import parse, parseString
initXmlStr='''\
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="viewstyle.xsl"?>
<statuses></statuses>'''
if len(sys.argv)<2:
print 'Usage: '+sys.argv[0]+' <log-dir>'
elif not os.path.exists(sys.argv[1]):
print 'No such file or directory'
else:
dir=sys.argv[1]
print 'dir: '+dir
# init
dom=parseString(initXmlStr)
statuses=dom.getElementsByTagName('statuses').item(0)
ids=set();
# read log-files
for path in os.listdir(dir):
path=dir+'/'+path
print 'read file: '+path
localDom=parse(path)
localStatuses=localDom.getElementsByTagName('status')
# append statuses
for status in localStatuses:
idNode=status.getElementsByTagName('id').item(0)
id=int(idNode.firstChild.data)
print 'get status: id: '+str(id)
if id not in ids:
statuses.appendChild(status)
ids.add(id)
print 'append status: accum.:'+str(len(ids))
else: print 'not append status'
# write marged-file
path=dir+'-marged-'+strftime("%Y%m%d-%H%M%S", localtime(time()))+'.xml'
f=open(path,'w','utf-8')
dom.writexml(f)
print 'write file: '+path
Initial URL
Initial Description
e.g. <pre><code> $ twlogmerge.py twitter-log-user-dir read file: twitter-log-user-dir/page001.xml get status: id: 123456789 append status: accum.:1234 ... append status: accum.:3456 write file: twitter-log-user-dir-marged-20090828-130146.xml </code></pre> * cf. [Backup my twitter-log](http://snipplr.com/view/18925/backup-my-twitterlog/) * cf. [Stylesheet for twitter-log file](http://snipplr.com/view/18927/stylesheet-for-twitterlog-file/)
Initial Title
Merge twitter-logged files into one xml-file
Initial Tags
Initial Language
Python