Optimize storage of drawing

If the drawing contain an image we extract it

Fix #591
This commit is contained in:
Julien Duponchelle 2016-07-13 16:13:14 +02:00
parent 3dccfbb852
commit 8be1e77a2e
No known key found for this signature in database
GPG Key ID: CE8B29639E07F5E8
6 changed files with 244 additions and 6 deletions

View File

@ -15,8 +15,20 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import hashlib
import asyncio
import base64
import uuid
import re
import os
import xml.etree.ElementTree as ET
from gns3server.utils.picture import get_size
import logging
log = logging.getLogger(__name__)
class Drawing:
@ -26,12 +38,12 @@ class Drawing:
"""
def __init__(self, project, drawing_id=None, svg="<svg></svg>", x=0, y=0, z=0, rotation=0):
self.svg = svg
self._project = project
if drawing_id is None:
self._id = str(uuid.uuid4())
else:
self._id = drawing_id
self.svg = svg
self._x = x
self._y = y
self._z = z
@ -41,12 +53,79 @@ class Drawing:
def id(self):
return self._id
@property
def ressource_filename(self):
"""
If the svg content has been dump to an external file return is name otherwise None
"""
if "<svg" not in self._svg:
return self._svg
return None
@property
def svg(self):
if "<svg" not in self._svg:
try:
filename = os.path.basename(self._svg)
with open(os.path.join(self._project.pictures_directory, filename), "rb") as f:
data = f.read()
try:
return data.decode()
except UnicodeError:
width, height = get_size(data)
return "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" height=\"{height}\" width=\"{width}\">\n<image height=\"{height}\" width=\"{width}\" xlink:href=\"data:image/{extension};base64,{b64}\" />\n</svg>".format(b64=base64.b64encode(data).decode(), extension=filename.split(".")[1], width=width, height=width)
except OSError:
log.warning("Image file %s missing", filename)
return "<svg></svg>"
return self._svg
@svg.setter
def svg(self, value):
"""
Set SVG field value.
If the svg has embed base64 element we will extract them
to disk in order to avoid duplication of content
"""
if len(value) < 500:
self._svg = value
return
root = ET.fromstring(value)
# SVG is the default namespace no need to prefix it
ET.register_namespace('xmlns', "http://www.w3.org/2000/svg")
ET.register_namespace('xmlns:xlink', "http://www.w3.org/1999/xlink")
if len(root.findall("{http://www.w3.org/2000/svg}image")) == 1:
href = "{http://www.w3.org/1999/xlink}href"
elem = root.find("{http://www.w3.org/2000/svg}image")
if elem.get(href, "").startswith("data:image/"):
changed = True
data = elem.get(href, "")
extension = re.sub(r"[^a-z0-9]", "", data.split(";")[0].split("/")[1].lower())
data = base64.decodebytes(data.split(",", 1)[1].encode())
# We compute an hash of the image file to avoid duplication
filename = hashlib.md5(data).hexdigest() + "." + extension
elem.set(href, filename)
file_path = os.path.join(self._project.pictures_directory, filename)
if not os.path.exists(file_path):
with open(file_path, "wb+") as f:
f.write(data)
value = filename
# We dump also large svg on disk to keep .gns3 small
if len(value) > 1000:
filename = hashlib.md5(value.encode()).hexdigest() + ".svg"
file_path = os.path.join(self._project.pictures_directory, filename)
if not os.path.exists(file_path):
with open(file_path, "w+") as f:
f.write(value)
self._svg = filename
else:
self._svg = value
@property
@ -123,7 +202,7 @@ class Drawing:
"y": self._y,
"z": self._z,
"rotation": self._rotation,
"svg": self._svg
"svg": self.svg
}
def __repr__(self):

View File

@ -125,12 +125,21 @@ class Project:
@property
def captures_directory(self):
"""
Location of the captures file
Location of the captures files
"""
path = os.path.join(self._path, "project-files", "captures")
os.makedirs(path, exist_ok=True)
return path
@property
def pictures_directory(self):
"""
Location of the images files
"""
path = os.path.join(self._path, "project-files", "images")
os.makedirs(path, exist_ok=True)
return path
@property
def computes(self):
"""
@ -352,9 +361,25 @@ class Project:
def close(self):
for compute in self._project_created_on_compute:
yield from compute.post("/projects/{}/close".format(self._id))
self._cleanPictures()
self.reset()
self._status = "closed"
def _cleanPictures(self):
"""
Delete unused images
"""
try:
pictures = set(os.listdir(self.pictures_directory))
for drawing in self._drawings.values():
pictures.remove(drawing.ressource_filename)
for pict in pictures:
os.remove(os.path.join(self.pictures_directory, pict))
except OSError as e:
log.warning(str(e))
@asyncio.coroutine
def delete(self):
yield from self.close()

View File

@ -55,8 +55,7 @@ DRAWING_OBJECT_SCHEMA = {
},
"svg": {
"description": "SVG content of the drawing",
"type": "string",
"pattern": "^<(.|[\r\n])+>$"
"type": "string"
}
},
"additionalProperties": False

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python
#
# Copyright (C) 2016 GNS3 Technologies Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import struct
def get_size(data):
"""
Get image size
:param data: A buffer with image content
:return: Tuple (width, height)
"""
# Original version:
# https://github.com/shibukawa/imagesize_py
#
# The MIT License (MIT)
#
# Copyright © 2016 Yoshiki Shibukawa
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
height = 0
width = 0
size = len(data)
# handle GIFs
if size >= 10 and data[:6] in (b'GIF87a', b'GIF89a'):
# Check to see if content_type is correct
try:
width, height = struct.unpack("<hh", data[6:10])
except struct.error:
raise ValueError("Invalid GIF file")
# see png edition spec bytes are below chunk length then and finally the
elif size >= 24 and data.startswith(b'\211PNG\r\n\032\n') and data[12:16] == b'IHDR':
try:
width, height = struct.unpack(">LL", data[16:24])
except struct.error:
raise ValueError("Invalid PNG file")
# Maybe this is for an older PNG version.
elif size >= 16 and data.startswith(b'\211PNG\r\n\032\n'):
# Check to see if we have the right content type
try:
width, height = struct.unpack(">LL", data[8:16])
except struct.error:
raise ValueError("Invalid PNG file")
# handle JPEGs
elif size >= 2 and data.startswith(b'\377\330'):
try:
fhandle.seek(0) # Read 0xff next
size = 2
ftype = 0
while not 0xc0 <= ftype <= 0xcf:
fhandle.seek(size, 1)
byte = fhandle.read(1)
while ord(byte) == 0xff:
byte = fhandle.read(1)
ftype = ord(byte)
size = struct.unpack('>H', fhandle.read(2))[0] - 2
# We are at a SOFn block
fhandle.seek(1, 1) # Skip `precision' byte.
height, width = struct.unpack('>HH', fhandle.read(4))
except struct.error:
raise ValueError("Invalid JPEG file")
# handle JPEG2000s
elif size >= 12 and data.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'):
fhandle.seek(48)
try:
height, width = struct.unpack('>LL', fhandle.read(8))
except struct.error:
raise ValueError("Invalid JPEG2000 file")
return width, height

View File

@ -18,6 +18,7 @@
from unittest.mock import MagicMock
import pytest
import uuid
import os
from tests.utils import AsyncioMagicMock
@ -90,3 +91,33 @@ def test_update(drawing, project, async_run, controller):
assert "svg" not in args[1]
assert project.dump.called
def test_image_base64(project):
"""
If image are embed as base 64 we need to dump them on disk
"""
svg = "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" height=\"128\" width=\"128\">\n<image height=\"128\" width=\"128\" xlink:href=\"\" />\n</svg>"
drawing = Drawing(project, None, svg=svg)
assert drawing._svg == "8418154b760b4e8023650e04c4992e24.png"
assert os.path.exists(os.path.join(project.pictures_directory, "8418154b760b4e8023650e04c4992e24.png"))
assert drawing.svg == svg
def test_image_svg(project):
"""
Large SVG are dump on disk
"""
svg = "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" height=\"128\" width=\"128\">\n"
for i in range(0, 1000):
svg += "<rect width=\"100\"></rect>"
svg += "</svg>"
drawing = Drawing(project, None, svg=svg)
assert drawing._svg == "fdf4d3035774a72ba165f7199b9431b2.svg"
assert os.path.exists(os.path.join(project.pictures_directory, "fdf4d3035774a72ba165f7199b9431b2.svg"))
assert drawing.svg == svg

View File

@ -265,6 +265,20 @@ def test_deleteDrawing(async_run, project, controller):
assert len(project._drawings) == 0
def test_cleanPictures(async_run, project, controller):
"""
When a project is close old pictures should be removed
"""
drawing = async_run(project.add_drawing())
drawing._svg = "test.png"
open(os.path.join(project.pictures_directory, "test.png"), "w+").close()
open(os.path.join(project.pictures_directory, "test2.png"), "w+").close()
async_run(project.close())
assert os.path.exists(os.path.join(project.pictures_directory, "test.png"))
assert not os.path.exists(os.path.join(project.pictures_directory, "test2.png"))
def test_delete(async_run, project, controller):
assert os.path.exists(project.path)
async_run(project.delete())