-
Notifications
You must be signed in to change notification settings - Fork 0
/
posterous_backup.py
executable file
·142 lines (121 loc) · 4.8 KB
/
posterous_backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#! /usr/bin/env python
# Copyright (C) 2010 Brad Kozlek
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version as published by
# the Free Software Foundation. http://www.gnu.org/licenses/gpl.html
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
import os
import urllib2
import time
import re
from xml.dom import minidom
import sys
#Change this to be your posterous hostname without the .posterous.com at the end.
MY_POSTEROUS_HOSTNAME = "transatlantical"
#Change this to the path where you would like to store your backup.
BACKUP_DIR_PATH = "backup"
#Probably don't want to to touch these. Having standard names will help if anyone ever makes something to take the data and import it.
BACKUP_XML_DIR_NAME ="xml"
BACKUP_FILES_DIR_NAME = "files"
def message(string):
print string
def xmlStringForURL(URL):
response = urllib2.urlopen(URL)
xmlString = response.read()
return xmlString
def saveStringToFile(myString, filePathString):
f = open(filePathString, 'w')
f.write(myString)
f.close()
def xmlBackupDirName():
timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime())
return os.path.join(BACKUP_DIR_PATH,BACKUP_XML_DIR_NAME,timestamp)
def xmlStringHasPosts(xmlString):
hasPosts = False
dom = minidom.parseString(xmlString)
posts = dom.getElementsByTagName("post")
if len(posts) > 0:
hasPosts = True
return hasPosts
def xmlOkay(xmlString):
responseOkay = False
dom = minidom.parseString(xmlString)
rsplist = dom.getElementsByTagName("rsp")
rsp = rsplist[0]
status = rsp.attributes["stat"].value
if status == "ok":
responseOkay = True
return responseOkay
def saveXMLFilesToDirectory(dirname):
os.makedirs(dirname)
page = 1
while(1):
URL = "http://posterous.com/api/readposts?hostname=%s&num_posts=50&page=%s" % (MY_POSTEROUS_HOSTNAME, page)
xml = xmlStringForURL(URL)
if not xmlStringHasPosts(xml):
#If the return has no posts, then I assume the previous page was the last. We can just stop here.
break
filename = os.path.join(dirname,"page%s.xml" % page)
message("saving xml for page %s\n" % page)
saveStringToFile(xml, filename)
page = page + 1
if not xmlOkay(xml):
message("Backup Failed. Posterous returned an error")
sys.exit()
def listOfPosterousFilesMentionedInXmlDir(dirname):
files = os.listdir(dirname)
returnUrlList = []
for file in files:
f= open(dirname+"/"+file, 'r')
xml = f.read()
urlList = re.findall(r'http://getfile.*?\.posterous\.com/getfile/.*?[<\'\"]',xml)
for url in urlList:
returnUrlList.append(url[:-1])
return returnUrlList
def listOfOriginalPosterousFiles(urlList):
returnOrigUrlList = []
scaledRe = re.compile(r'.*\.(thumb|scaled[0-9]*)', re.I)
origRe = re.compile(r'(?P<origurl>.*)\.(thumb|scaled[0-9]*)', re.I)
for url in urlList:
if scaledRe.match(url):
origUrl = origRe.search(url).group('origurl')
if origUrl not in returnOrigUrlList:
print "orig file: %s" % origUrl
returnOrigUrlList.append(origUrl)
return returnOrigUrlList
def getPosterousFile(url):
urlDir = re.sub(r'http://getfile.*?\.posterous\.com/getfile/', '', url)
dirList = urlDir.split("/")
fileName = dirList.pop()
fileBackupDir = os.path.join(BACKUP_DIR_PATH, BACKUP_FILES_DIR_NAME)
for dir in dirList:
fileBackupDir = os.path.join(fileBackupDir, dir)
if not os.path.isdir(fileBackupDir):
os.makedirs(fileBackupDir)
backupFilePath = os.path.join(fileBackupDir, fileName)
if not os.path.isfile(backupFilePath):
message("getting ready to download %s to %s" % (url, backupFilePath))
try:
f = urllib2.urlopen(url)
lf = open(backupFilePath, 'w')
lf.write(f.read())
lf.close()
message("done")
except urllib2.HTTPError as httperror:
print "Download failed: %s" % (httperror)
else:
message("file exists, skipping %s" % backupFilePath)
def main():
if MY_POSTEROUS_HOSTNAME == "put hostname here":
print "You need to edit the script to configure it with your posterous hostname"
sys.exit()
dirName = xmlBackupDirName()
saveXMLFilesToDirectory(dirName)
urlList = listOfPosterousFilesMentionedInXmlDir(dirName)
urlList.extend(listOfOriginalPosterousFiles(urlList))
for url in urlList:
getPosterousFile(url)
main()