-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtppage.py
273 lines (228 loc) · 10 KB
/
tppage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
"""tppage.py -- proof-of-principle of a trivial plots page utility
Requires imagemagick's convert utility for making thumbnails,
otherwise just uses Python's standard modules.
Usage examples:
Starting in the root directory of the plots pages:
To put captions and files from doc 9876 in the under_review pages:
$ python tppage.py under_review 9876
To do the same thing, but reading from a .zip file instead of Doc-DB:
$ python tppage.py under_review 9876 example_9876.zip
To rebuild under_review/index.html without changing any other files:
$ python tppage.py under_review
The documents must conform to a particular structure in order to be imported.
Captions must be provided in files with the naming convention
*_caption.txt or *_caption.tex. Only files whose names who are matched with
captions will be imported.
:author: G. Horton-Smith
"""
import zipfile
import os
import time
import urllib
import re
################################################################
# Some constants
################################################################
# Warning: CAP_END_TXT and CAP_END_TEX have to be the same length
CAP_END_TXT = '_caption.txt'
CAP_END_TEX = '_caption.tex'
THUMB_END = "_thumb.png"
IMAGE_EXT_LIST = ['pdf', 'eps', 'jpeg', 'jpg', 'png', 'gif']
DOCDB_SHOW_DOC_TEMPLATE = 'http://microboone-docdb.fnal.gov:8080/cgi-bin/ShowDocument?docid=%s'
DOCDB_SHOW_PUBLIC_DOC_TEMPLATE = 'http://microboone-docdb.fnal.gov/cgi-bin/ShowDocument?docid=%s'
DOCDB_GET_ZIP_TEMPLATE = 'http://microboone-docdb.fnal.gov:8080/cgi-bin/RetrieveArchive?docid=%s&type=zip'
HTML_HEADER = """<html>
<head><title>Index of %(status)s plots and other data representations</title>
<link rel="StyleSheet" href="ubplot.css" type="text/css"
media="screen, projection"/>
<style>
div.FLOATBOX { border: thin solid #b37013; box-shadow: 2pt 2pt 2pt #f4cd98;
margin: 4pt; padding: 2pt; float: left; }
#plotpagefooter { width: 100%%; position:fixed; bottom:0; left:0;
background: #fed; padding-top: 2pt; }
</style>
</head>
<body>
<h1>Index of %(status)s plots and other data representations</h1>
"""
HTML_FOOTER = """<br/>
<div id="plotpagefooter">
Questions? Click on the document numbers for notes and authors,
or <a href="../plot_contacts.html">contact us</a>.
<span style="float:right">Page last updated: %s </span>
</div>
</body>
</html>
"""
################################################################
# Utility functions
################################################################
def smartExtractFromZip(zipfile, fn, destdir):
shortfn = fn[fn.rfind('/')+1:]
data = zipfile.read( fn )
file(destdir+"/"+shortfn,'w').write(data)
################################################################
# The code that does all the work.
################################################################
class TPP:
"""TPP is the main class for the "trivial plots page" utility."""
def __init__(self):
pass
def add_plots( self, status, docno, zf=None ):
"""Add all captioned files from docno to subdirectory named
status in current directory, and rebuild index pages.
Clears old files in the destination directory before filling.
Does not remove old files from other directories.
If zipfile is provided, use it instead of retrieving zipfile from
Doc-DB."""
#-- get zip file from Doc-DB if none supplied
if zf == None:
zf = self.getZip(docno)
#-- clean up "status" and form destination dir
status = status.strip().strip('/')
docno = docno.strip()
destdir = './%s/%s' % ( status, docno )
self.cleardir( destdir )
namelist = zf.namelist()
captions = list(fn for fn in namelist
if fn.endswith(CAP_END_TXT) or fn.endswith(CAP_END_TEX))
for cap in captions:
# zf.extract( cap, destdir )
smartExtractFromZip( zf, cap, destdir )
# warning: next line assumes CAP_END_TXT and CAP_END_TEX same length
prefix = cap[:-len(CAP_END_TXT)]
for fn in namelist:
if fn.startswith(prefix):
#zf.extract( fn, destdir )
smartExtractFromZip( zf, fn, destdir )
self.rebuild_index_page( status )
def cleardir( self, dname ):
"""Clear files from the given directory, or make directory if needed"""
# does directory already exist?
if os.access( dname, os.F_OK ):
# clear directory
for fn in os.listdir( dname ):
os.remove( dname + '/' + fn)
else:
os.makedirs( dname )
def rebuild_index_page( self, status ):
"""Rebuild the html pages from the files found in subdirectories.
Relies on python's "walk" function -- see help(os.walk).
"""
status = status.strip().strip('/')
if status.startswith("public"):
show_doc_template = DOCDB_SHOW_PUBLIC_DOC_TEMPLATE
else:
show_doc_template = DOCDB_SHOW_DOC_TEMPLATE
fout = file("%s/index.html" % status, "w")
fout.write( HTML_HEADER % { 'status' : status } )
dirwalk_list = list( os.walk(status) )
dirwalk_list.sort()
for dirpath, subdirnames, filenames in dirwalk_list:
captionfns = list( fn for fn in filenames
if (fn.endswith(CAP_END_TXT)
or fn.endswith(CAP_END_TEX)) )
if captionfns == []:
continue
# form relative directory name from "dirpath" returned by os.walk()
reldir = dirpath[len(status)+1:]
# use first part of reldir (DocDB number) as section title
if '/' in reldir:
header2 = reldir[:reldir.find('/')]
else:
header2 = reldir
docinfo = self.getDocInfo(header2)
fout.write('<hr/><a href="%s"><h2>%s (#%s)</h2></a>\n' % (
show_doc_template % header2, docinfo['title'], header2) )
# uncomment line below to show authors
# fout.write('<ul>%s</ul>\n' % docinfo['authors'])
captionfns.sort()
for cap in captionfns:
# warning: next line assumes CAP_END_TXT and CAP_END_TEX same length
prefix = cap[:-len(CAP_END_TXT)]
fout.write("<div><h3>%s</h3><br/>\n" % prefix)
fnlist = list( fn for fn in filenames if (
fn.startswith(prefix+".")
or fn.startswith(prefix+"_caption.") )
and not fn.endswith(THUMB_END) )
fnlist.sort()
# find a good image for making a thumbnail image
thumbfn = None
for ext in IMAGE_EXT_LIST:
thumbfn = prefix + "." + ext
if thumbfn in fnlist:
break
else:
thumbfn = None
# make the thumbnail
if thumbfn != None:
thumb_fn = self.make_thumb( dirpath+"/"+thumbfn )
fout.write('<div class="FLOATBOX"><img src="%s"/></div>\n' %
thumb_fn[len(status)+1:] )
for fn in fnlist:
if fn.endswith(THUMB_END):
continue
fout.write('<a href="%s">%s</a><br/>\n' %
(reldir+"/"+fn, fn) )
# now the caption
caption = file(dirpath + "/" + cap).read()
fout.write('<br clear="all"/><p>%s</p>\n' % caption)
fout.write("</div>\n")
fout.write("<hr/>\n")
fout.write( HTML_FOOTER % time.ctime())
fout.write("</body>\n</html>\n")
fout.close()
def make_thumb(self, fn):
"""Make a thumbnail. Requires imagemagick's convert utiltity."""
thumbfn = fn + THUMB_END
os.system("convert -trim +repage -resize 400 '%s' '%s'" % (fn, thumbfn))
return thumbfn
def getZip(self, docno):
"""Get the zip file from Doc-DB."""
url = DOCDB_GET_ZIP_TEMPLATE % docno
(filename, headers) = urllib.urlretrieve(url)
print "Retrieved document %s to temporary file %s" % (docno, filename)
return zipfile.ZipFile( filename )
def getDocInfo(self, docno):
"""Get the document info from Doc-DB."""
url = DOCDB_SHOW_DOC_TEMPLATE % docno
info = {}
data = urllib.urlopen(url).read()
m = re.search('<div[^>]*id *= *"DocTitle"[^>]*>.*<h1>([^<]*)</h1>',
data, re.S)
if m:
info['title'] = m.groups()[0]
else:
info['title'] = ""
print "No title found for %s" % docno
m = re.search('<div[^>]*id *= *"Authors"[^>]*>.*<ul>(.*)</ul>',
data, re.S)
if m:
info['authors'] = m.groups()[0]
else:
info['authors'] = ''
print "No authors found for %s" % docno
return info
################################################################
# ye basic "main" entry point
################################################################
if __name__ == "__main__":
import sys
tpp = TPP()
if len(sys.argv) >= 4:
tpp.add_plots( sys.argv[1], sys.argv[2], zipfile.ZipFile(sys.argv[3]) )
elif len(sys.argv) == 3:
tpp.add_plots( sys.argv[1], sys.argv[2] )
elif len(sys.argv) == 2:
tpp.rebuild_index_page( sys.argv[1] )
else:
print """Usage examples:
Starting in the root directory of the plots pages:
To put captions and files from doc 9876 in the under_review pages:
$ python tppage.py under_review 9876
To do the same thing, but reading from a .zip file instead of Doc-DB:
$ python tppage.py under_review 9876 example_9876.zip
To rebuild under_review/index.html without changing any other files:
$ python tppage.py under_review
"""
sys.exit(1)