/ Published in: Python
e.g.
<pre><code>
$ twlogmerge.py twitter-log-user-dir
read file: twitter-log-user-dir/page001.xml
get status: id: 123456789
append status: accum.:1234
...
append status: accum.:3456
write file: twitter-log-user-dir-marged-20090828-130146.xml
</code></pre>
* cf. [Backup my twitter-log](http://snipplr.com/view/18925/backup-my-twitterlog/)
* cf. [Stylesheet for twitter-log file](http://snipplr.com/view/18927/stylesheet-for-twitterlog-file/)
<pre><code>
$ twlogmerge.py twitter-log-user-dir
read file: twitter-log-user-dir/page001.xml
get status: id: 123456789
append status: accum.:1234
...
append status: accum.:3456
write file: twitter-log-user-dir-marged-20090828-130146.xml
</code></pre>
* cf. [Backup my twitter-log](http://snipplr.com/view/18925/backup-my-twitterlog/)
* cf. [Stylesheet for twitter-log file](http://snipplr.com/view/18927/stylesheet-for-twitterlog-file/)
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
import os import os.path import sys from codecs import open from time import strftime, localtime, time from xml.dom.minidom import parse, parseString initXmlStr='''\ <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="viewstyle.xsl"?> <statuses></statuses>''' if len(sys.argv)<2: print 'Usage: '+sys.argv[0]+' <log-dir>' elif not os.path.exists(sys.argv[1]): print 'No such file or directory' else: dir=sys.argv[1] print 'dir: '+dir # init dom=parseString(initXmlStr) statuses=dom.getElementsByTagName('statuses').item(0) ids=set(); # read log-files for path in os.listdir(dir): path=dir+'/'+path print 'read file: '+path localDom=parse(path) localStatuses=localDom.getElementsByTagName('status') # append statuses for status in localStatuses: idNode=status.getElementsByTagName('id').item(0) id=int(idNode.firstChild.data) print 'get status: id: '+str(id) if id not in ids: statuses.appendChild(status) ids.add(id) print 'append status: accum.:'+str(len(ids)) else: print 'not append status' # write marged-file path=dir+'-marged-'+strftime("%Y%m%d-%H%M%S", localtime(time()))+'.xml' f=open(path,'w','utf-8') dom.writexml(f) print 'write file: '+path