229 lines
7.7 KiB
Python
229 lines
7.7 KiB
Python
#!/usr/bin/env python
|
|
#coding:utf-8
|
|
# Purpose: filemanager module
|
|
# Created: 31.12.2010
|
|
# Copyright (C) 2010, Manfred Moitzi
|
|
# License: MIT license
|
|
from __future__ import unicode_literals, print_function, division
|
|
__author__ = "mozman <mozman@gmx.at>"
|
|
|
|
import os
|
|
import zipfile
|
|
import random
|
|
from datetime import datetime
|
|
|
|
from .xmlns import etree, CN
|
|
from .manifest import Manifest
|
|
from .compatibility import tobytes, bytes2unicode, is_bytes, is_zipfile
|
|
from .compatibility import is_stream, StringIO
|
|
|
|
FNCHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
|
|
|
|
class FileObject(object):
|
|
__slots__ = ['element', 'media_type', 'zipinfo']
|
|
|
|
def __init__(self, name, element, media_type=""):
|
|
self.element = element
|
|
self.media_type = media_type
|
|
now = datetime.now().timetuple()
|
|
self.zipinfo = zipfile.ZipInfo(name, now[:6])
|
|
self.zipinfo.compress_type = zipfile.ZIP_DEFLATED
|
|
|
|
def tobytes(self):
|
|
if hasattr(self.element, 'tobytes'):
|
|
if self.media_type == 'text/xml':
|
|
return self.element.tobytes(xml_declaration=True)
|
|
else:
|
|
return self.element.tobytes()
|
|
else:
|
|
return tobytes(self.element)
|
|
|
|
@property
|
|
def filename(self):
|
|
return self.zipinfo.filename
|
|
|
|
class FileManager(object):
|
|
def __init__(self, zipname=None):
|
|
self.directory = dict()
|
|
self.zipname = zipname
|
|
self.manifest = Manifest(self.get_bytes('META-INF/manifest.xml'))
|
|
self.register('META-INF/manifest.xml', self.manifest, 'text/xml')
|
|
|
|
def has_zip(self):
|
|
if self.zipname is not None:
|
|
return is_zipfile(self.zipname)
|
|
return False
|
|
|
|
def _open_bytestream(self):
|
|
return open(self.zipname, 'rb')
|
|
|
|
def tmpfilename(self, basefile=None):
|
|
def randomname(count):
|
|
return ''.join(random.sample(FNCHARS, count))
|
|
|
|
folder = "" if basefile is None else os.path.dirname(basefile)
|
|
while True:
|
|
filename = os.path.abspath(os.path.join(folder, randomname(8)+'.tmp'))
|
|
if not os.path.exists(filename):
|
|
return filename
|
|
|
|
def register(self, name, element, media_type=""):
|
|
self.directory[name] = FileObject(name, element, media_type)
|
|
# 'mimetype' need not to be in the manifest.xml file, but it seems
|
|
# not to break the vadility of the manifest file:
|
|
# if name != 'mimetype:
|
|
# self.manifest.add(name, media_type)
|
|
self.manifest.add(name, media_type)
|
|
|
|
def save(self, filename, backup=True):
|
|
# always create a new zipfile
|
|
write_to_memory = False
|
|
if is_stream(filename):
|
|
write_to_memory = True
|
|
if write_to_memory:
|
|
tmpfilename = filename
|
|
else:
|
|
tmpfilename = self.tmpfilename(filename)
|
|
zippo = zipfile.ZipFile(tmpfilename, 'w', zipfile.ZIP_DEFLATED)
|
|
self._tozip(zippo)
|
|
zippo.close()
|
|
|
|
if write_to_memory:
|
|
# job done
|
|
return
|
|
|
|
if os.path.exists(filename):
|
|
if backup:
|
|
# existing document becomes the backup file
|
|
bakfilename = filename+'.bak'
|
|
# remove existing backupfile
|
|
if os.path.exists(bakfilename):
|
|
os.remove(bakfilename)
|
|
os.rename(filename, bakfilename)
|
|
else:
|
|
# just remove the existing document
|
|
os.remove(filename)
|
|
|
|
# rename the new created document
|
|
os.rename(tmpfilename, filename)
|
|
self.zipname = filename
|
|
|
|
def get_bytes(self, filename):
|
|
""" Returns a byte stream or None. """
|
|
filecontent = None
|
|
if self.has_zip():
|
|
bytestream = self._open_bytestream()
|
|
zipfile_ = zipfile.ZipFile(bytestream, 'r')
|
|
try:
|
|
filecontent = zipfile_.read(filename)
|
|
except KeyError:
|
|
pass
|
|
zipfile_.close()
|
|
bytestream.close()
|
|
return filecontent
|
|
|
|
def get_text(self, filename, default=None):
|
|
""" Retuns a str or 'default'. """
|
|
filecontent = self.get_bytes(filename)
|
|
if filecontent is not None:
|
|
return bytes2unicode(filecontent)
|
|
else:
|
|
return default
|
|
|
|
def get_xml_element(self, filename):
|
|
filecontent = self.get_bytes(filename)
|
|
if filecontent:
|
|
return etree.XML(filecontent)
|
|
else:
|
|
return None
|
|
|
|
def _tozip(self, zippo):
|
|
# mimetype file should be the first & uncompressed file in zipfile
|
|
mimetype = self.directory.pop('mimetype')
|
|
mimetype.zipinfo.compress_type = zipfile.ZIP_STORED
|
|
zippo.writestr(mimetype.zipinfo, mimetype.tobytes())
|
|
processed = [mimetype.filename]
|
|
|
|
for file in self.directory.values():
|
|
zippo.writestr(file.zipinfo, file.tobytes())
|
|
processed.append(file.filename)
|
|
|
|
# push mimetype back to directory
|
|
self.directory['mimetype'] = mimetype
|
|
self._copy_zip_to(zippo, processed)
|
|
|
|
def _copy_zip_to(self, newzip, ignore=[]):
|
|
""" Copy all files like pictures and settings except the files in 'ignore'.
|
|
"""
|
|
if not self.has_zip():
|
|
return # nothing to copy
|
|
try:
|
|
bytestream = self._open_bytestream()
|
|
except IOError:
|
|
return # nothing to copy
|
|
|
|
origzip = zipfile.ZipFile(bytestream)
|
|
try:
|
|
self._copy_from_zip_to_zip(origzip, newzip, ignore)
|
|
finally:
|
|
origzip.close()
|
|
bytestream.close()
|
|
|
|
@staticmethod
|
|
def _copy_from_zip_to_zip(fromzip, tozip, ignore):
|
|
for zipinfo in fromzip.filelist:
|
|
if zipinfo.filename not in ignore:
|
|
tozip.writestr(zipinfo, fromzip.read(zipinfo.filename))
|
|
|
|
def tobytes(self):
|
|
iobuffer = StringIO()
|
|
zippo = zipfile.ZipFile(iobuffer, 'w', zipfile.ZIP_DEFLATED)
|
|
self._tozip(zippo)
|
|
zippo.close()
|
|
buffer = iobuffer.getvalue()
|
|
del iobuffer
|
|
return buffer
|
|
|
|
def check_zipfile_for_oasis_validity(filename, mimetype):
|
|
""" Checks the zipfile structure and least necessary content, but not the
|
|
XML validity of the document.
|
|
"""
|
|
def check_manifest(stream):
|
|
xmltree = etree.XML(stream)
|
|
directory = dict([ (e.get(CN('manifest:full-path')), e) for e in xmltree.findall(CN('manifest:file-entry')) ])
|
|
for name in ('content.xml', 'meta.xml', 'styles.xml', '/'):
|
|
if name not in directory:
|
|
return False
|
|
if bytes2unicode(mimetype) != directory['/'].get(CN('manifest:media-type')):
|
|
return False
|
|
return True
|
|
|
|
assert is_bytes(mimetype)
|
|
if not is_zipfile(filename):
|
|
return False
|
|
# The first file in an OpenDocumentFormat zipfile should be the uncompressed
|
|
# mimetype file, in a regular zipfile this file starts at byte position 30.
|
|
# see also OASIS OpenDocument Specs. Chapter 17.4
|
|
# LibreOffice ignore this requirement and opens all documents with
|
|
# valid content (META-INF/manifest.xml, content.xml).
|
|
with open(filename, 'rb') as f:
|
|
buffer = f.read(38 + len(mimetype))
|
|
if buffer[30:] != b'mimetype'+mimetype:
|
|
return False
|
|
zf = zipfile.ZipFile(filename)
|
|
names = zf.namelist()
|
|
if 'META-INF/manifest.xml' in names:
|
|
manifest = zf.read('META-INF/manifest.xml')
|
|
else:
|
|
manifest = None
|
|
zf.close()
|
|
|
|
if manifest is None:
|
|
return False
|
|
# meta.xml and styles.xml are not required, but I think they should
|
|
for filename in ['content.xml', 'meta.xml', 'styles.xml', 'mimetype']:
|
|
if filename not in names:
|
|
return False
|
|
result = check_manifest(manifest)
|
|
return result
|