Source code for MyCapytain.common.metadata
# -*- coding: utf-8 -*-
"""
.. module:: MyCapytain.common.metadata
:synopsis: Metadata related objects
.. moduleauthor:: Thibault Clérice <leponteineptique@gmail.com>
"""
from __future__ import unicode_literals
from six import text_type
from random import randint
from copy import deepcopy
from types import GeneratorType
from collections import defaultdict, OrderedDict
from MyCapytain.common.constants import Namespace, RDF_PREFIX, RDF_MAPPING, Mimetypes, Exportable
from MyCapytain.errors import UnknownNamespace
[docs]class Metadatum(object):
""" Metadatum object represent a single field of metadata
:param name: Name of the field
:type name: text_type
:param children: List of tuples, where first element is the key, and second the value
:type children: List
:param namespace: Object representing a namespace
:type namespace: Namespace
:Example:
>>> a = Metadatum("label", [("lat", "Amores"), ("fre", "Les Amours")])
>>> print(a["lat"]) # == "Amores"
.. automethod:: __getitem__
.. automethod:: __setitem__
.. automethod:: __iter__
"""
def __init__(self, name, children=None, namespace=None):
""" Initiate a Metadatum object
"""
self.name = name
self.children = OrderedDict()
self.default = None
self.__namespace__ = namespace
if "//" in name and namespace is None:
uri, self.name = tuple(name.rsplit("/"))
if uri not in RDF_MAPPING:
prefix = "ns{}".format(randint(1, 4096))
else:
prefix = RDF_MAPPING[uri]
self.namespace = Namespace(uri, prefix)
if ":" in name and namespace is None:
prefix, self.name = tuple(name.split(":"))
if prefix not in RDF_PREFIX:
raise UnknownNamespace(
"%s is unknown. Update MyCapytain.common.utils.RDF_PREFIX to support this prefix" % prefix
)
self.namespace = Namespace(RDF_PREFIX[prefix], prefix)
if children is not None and isinstance(children, list):
for tup in children:
self[tup[0]] = tup[1]
@property
def namespace(self):
""" Namespace of the metadata entry """
return self.__namespace__
@namespace.setter
def namespace(self, namespace):
""" Set namespace property
:param namespace: Namespace to set
:type namespace: Namespace
"""
if namespace is not None and not isinstance(namespace, Namespace):
raise TypeError("Only None and Namespace value are accepted")
self.__namespace__ = namespace
[docs] def __getitem__(self, key):
""" Add an iterable access method
Int typed key access to the *n* th registered key in the instance.
If string based key does not exist, see for a default.
:param key: Key of wished value
:type key: text_type, tuple, int
:returns: An element of children whose index is key
:raises: KeyError if key is unknown (when using Int based key or when default is not set)
:Example:
>>> a = Metadatum("label", [("lat", "Amores"), ("fre", "Les Amours")])
>>> print(a["lat"]) # Amores
>>> print(a[("lat", "fre")]) # Amores, Les Amours
>>> print(a[0]) # Amores
>>> print(a["dut"]) # Amores
"""
if isinstance(key, int):
items = list(self.children.keys())
if key + 1 > len(items):
raise KeyError("Unknown key %s" % key)
else:
key = items[key]
elif isinstance(key, tuple):
return tuple([self[k] for k in key])
if key not in self.children:
if self.default is None:
raise KeyError("Unknown key %s" % key)
else:
return self.children[self.default]
else:
return self.children[key]
[docs] def __setitem__(self, key, value):
""" Register index key and value for the instance
:param key: Index key(s) for the metadata
:type key: text_type, list, tuple
:param value: Values for the metadata
:type value: text_type, list, tuple
:returns: An element of children whose index is key
:raises: `TypeError` if key is not text_type or tuple of text_type
:raises: `ValueError` if key and value are list and are not the same size
:Example:
>>> a = Metadatum(name="label")
>>> a["eng"] = "Illiad"
>>> print(a["eng"]) # Illiad
>>> a[("fre", "grc")] = ("Illiade", "Ἰλιάς")
>>> print(a["fre"], a["grc"]) # Illiade, Ἰλιάς
>>> a[("ger", "dut")] = "Iliade"
>>> print(a["ger"], a["dut"]) # Iliade, Iliade
"""
if isinstance(key, tuple):
if not isinstance(value, (tuple, list)):
value = [value]*len(key)
if len(value) < len(key):
raise ValueError("Less values than keys detected")
for i in range(0, len(key)):
self[key[i]] = value[i]
elif not isinstance(key, text_type):
raise TypeError(
"Only text_type or tuple instances are accepted as key")
else:
self.children[key] = value
if self.default is None:
self.default = key
[docs] def setDefault(self, key):
""" Set a default key when a field does not exist
:param key: An existing key of the instance
:type key: text_type
:returns: Default key
:raises: `ValueError` If key is not registered
:Example:
>>> a = Metadatum("label", [("lat", "Amores"), ("fre", "Les Amours")])
>>> a.setDefault("fre")
>>> print(a["eng"]) # == "Les Amours"
"""
if key not in self.children:
raise ValueError("Can not set a default to an unknown key")
else:
self.default = key
return self.default
[docs] def __iter__(self):
""" Iter method of Metadatum
:Example:
>>> a = Metadata("label", [("lat", "Amores"), ("fre", "Les Amours")])
>>> for key, value in a:
>>> print(key, value) # Print ("lat", "Amores") and then ("fre", "Les Amours")
"""
for key in self.children:
yield (key, self.children[key])
def __len__(self):
""" Get the length of the current Metadatum object
:return: Number of variant of the metadatum
:rtype: int
:Example:
>>> a = Metadata("label", [("lat", "Amores"), ("fre", "Les Amours")])
>>> len(a) == 2
"""
return len(self.children)
def __getstate__(self):
""" Pickling method
:return:
"""
return dict(
name=self.name,
langs=[(key, val) for key, val in self.children.items()],
default=self.default
)
def __setstate__(self, dic):
""" Unpickling method
:param dic: Dictionary to use to set up the object
:return: New generated object
"""
self.name = dic["name"]
self.children = OrderedDict(dic["langs"])
self.default = dic["default"]
return self
[docs]class Metadata(Exportable):
"""
A metadatum aggregation object provided to centralize metadata
:param keys: A metadata field names list
:type keys: [text_type]
:ivar metadata: Dictionary of metadatum
.. automethod:: __getitem__
.. automethod:: __setitem__
.. automethod:: __iter__
.. automethod:: __len__
.. automethod:: __add__
:cvar EXPORT_TO: List of exportable supported formats
:cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
"""
EXPORT_TO = [Mimetypes.JSON.Std, Mimetypes.XML.RDF, Mimetypes.JSON.DTS.Std]
DEFAULT_EXPORT = Mimetypes.JSON.Std
def __init__(self, keys=None):
""" Initiate the object
"""
self.metadata = defaultdict(Metadatum)
self.__keys__ = []
if keys is not None and isinstance(keys, (list, set, GeneratorType)):
for key in keys:
self[key] = Metadatum(name=key)
[docs] def __getitem__(self, key):
""" Add a quick access system through getitem on the instance
:param key: Index key representing a set of metadatum
:type key: text_type, int, tuple
:returns: An element of children whose index is key
:raises: `KeyError` If key is not registered or recognized
:Example:
>>> a = Metadata()
>>> m1 = Metadatum("title", [("lat", "Amores"), ("fre", "Les Amours")])
>>> m2 = Metadatum("author", [("lat", "Ovidius"), ("fre", "Ovide")])
>>> a[("title", "author")] = (m1, m2)
>>> a["title"] == m1
>>> a[0] == m1
>>> a[("title", "author")] == (m1, m2)
"""
if isinstance(key, int):
if key + 1 > len(self.__keys__):
raise KeyError()
else:
key = self.__keys__[key]
elif isinstance(key, tuple):
return tuple([self[k] for k in key])
if key not in self.metadata:
raise KeyError()
else:
return self.metadata[key]
[docs] def __setitem__(self, key, value):
""" Set a new metadata field
:param key: Name of metadatum field
:type key: text_type, tuple
:param value: Metadum dictionary
:type value: Metadatum
:returns: An element of children whose index is key
:raises: `TypeError` if key is not text_type or tuple of text_type
:raises: `ValueError` if key and value are list and are not the same size
:Example:
>>> a = Metadata()
>>> a["title"] = Metadatum("title", [("lat", "Amores"), ("fre", "Les Amours")])
>>> print(a["title"]["lat"]) # Amores
>>> a[("title", "author")] = (
>>> Metadatum("title", [("lat", "Amores"), ("fre", "Les Amours")]),
>>> Metadatum("author", [("lat", "Ovidius"), ("fre", "Ovide")])
>>> )
>>> print(a["title"]["lat"], a["author"]["fre"]) # Amores, Ovide
"""
if isinstance(key, tuple):
if len(value) < len(key):
raise ValueError("Less values than keys detected")
for i in range(0, len(key)):
self[key[i]] = value[i]
elif not isinstance(key, text_type):
raise TypeError(
"Only text_type or tuple instances are accepted as key")
else:
if not isinstance(value, Metadatum) and isinstance(value, list):
self.metadata[key] = Metadatum(key, value)
elif isinstance(value, Metadatum):
self.metadata[key] = value
if key in self.metadata and key not in self.__keys__:
self.__keys__.append(key)
[docs] def __iter__(self):
""" Iter method of Metadata
:Example:
>>> a = Metadata(("title", "desc", "author"))
>>> for key, value in a:
>>> print(key, value) # Print ("title", "<Metadatum object>") then ("desc", "<Metadatum object>")...
"""
i = 0
for key in self.__keys__:
yield (key, self.metadata[key])
i += 1
[docs] def __add__(self, other):
""" Merge Metadata objects together
:param other: Metadata object to merge with the current one
:type other: Metadata
:returns: The merge result of both metadata object
:rtype: Metadata
:Example:
>>> a = Metadata(name="label")
>>> b = Metadata(name="title")
>>> a + b == Metadata(name=["label", "title"])
"""
result = deepcopy(self)
for metadata_key, metadatum in other:
if metadata_key in self.__keys__:
for key, value in metadatum:
result[metadata_key][key] = value
else:
result[metadata_key] = metadatum
return result
[docs] def __len__(self):
""" Returns the number of Metadatum registered in the object
:rtype: int
:returns: Number of metadatum objects
:Example:
>>> a = Metadata(("title", "description", "author"))
>>> print(len(a)) # 3
"""
return len(
[
k
for k in self.__keys__
if isinstance(self.metadata[k], Metadatum)
]
)
def __getstate__(self):
""" Pickling method
:return:
"""
return {
key: getattr(value, "__getstate__")() for key, value in self.metadata.items()
}
def __setstate__(self, dic):
""" Unpickling method
:param dic: Dictionary with request value
:return:
"""
self.metadata = defaultdict(Metadatum)
self.__keys__ = []
for key, value in dic.items():
self.__keys__.append(key)
self.metadata[key] = getattr(Metadatum(name=value["name"]), "__setstate__")(value)
return self
[docs] def keys(self):
""" List of keys available
:return: List of metadatum keys
"""
return self.__keys__
def __export__(self, output=Mimetypes.JSON.Std, **kwargs):
""" Export a set of Metadata
:param output: Mimetype to export to
:return: Formatted Export
"""
if output == Mimetypes.JSON.Std:
return {
key: getattr(value, "__getstate__")() for key, value in self.metadata.items()
}
elif output == Mimetypes.JSON.DTS.Std:
descs = {
}
for key in sorted(self.metadata.keys()):
metadatum = self.metadata[key]
if metadatum.namespace is not None:
ns = metadatum.namespace.uri
else:
ns = ""
for lang, value in metadatum:
if lang not in descs:
descs[lang] = {"@language": lang}
descs[lang][ns+metadatum.name] = value
return [value for value in descs.values()]
elif output == Mimetypes.XML.RDF:
out = ""
for key in sorted(self.metadata.keys()):
metadatum = self.metadata[key]
if metadatum.namespace is None:
out += "".join([
"<{0} xml:lang=\"{1}\">{2}</{0}>".format(metadatum.name, lang, value)
for lang, value in metadatum
])
else:
out += "".join([
"<{1} xmlns=\"{0}\" xml:lang=\"{2}\">{3}</{1}>".format(
metadatum.namespace.uri, metadatum.name, lang, value
)
for lang, value in metadatum
])
return """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description>
"""+out+"""
</rdf:Description>
</rdf:RDF>"""