Source code for MyCapytain.resources.collections.cts

# -*- coding: utf-8 -*-
"""
.. module:: MyCapytain.resources.xml
   :synopsis: XML based CtsTextMetadata and repository

.. moduleauthor:: Thibault Clérice <leponteineptique@gmail.com>


"""
from __future__ import unicode_literals

from rdflib import URIRef, Literal
from rdflib.namespace import XSD
from lxml.objectify import IntElement, FloatElement

from MyCapytain.resources.prototypes.cts import inventory as cts
from MyCapytain.common.reference._capitains_cts import Citation as CitationPrototype
from MyCapytain.common.utils import expand_namespace
from MyCapytain.common.utils.xml import xmlparser
from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes, RDF_NAMESPACES


__all__ = [
    "XmlCtsCitation",
    "XmlCtsWorkMetadata",
    "XmlCtsCommentaryMetadata",
    "XmlCtsTranslationMetadata",
    "XmlCtsEditionMetadata",
    "XmlCtsTextgroupMetadata",
    "XmlCtsTextInventoryMetadata",
    "XmlCtsTextMetadata"
]

_CLASSES_DICT = {}


[docs]class XmlCtsCitation(CitationPrototype): """ XmlCtsCitation XML implementation for CtsTextInventoryMetadata """
[docs] @classmethod def ingest(cls, resource, element=None, xpath="ti:citation"): """ Ingest xml to create a citation :param resource: XML on which to do xpath :param element: Element where the citation should be stored :param xpath: XPath to use to retrieve citation :return: XmlCtsCitation """ # Reuse of of find citation results = resource.xpath(xpath, namespaces=XPATH_NAMESPACES) if len(results) > 0: citation = cls( name=results[0].get("label"), xpath=results[0].get("xpath"), scope=results[0].get("scope") ) if isinstance(element, cls): element.child = citation cls.ingest( resource=results[0], element=element.child ) else: element = citation cls.ingest( resource=results[0], element=element ) return citation return None
def _xpathDict(xml, xpath, cls, parent, **kwargs): """ Returns a default Dict given certain information :param xml: An xml tree :type xml: etree :param xpath: XPath to find children :type xpath: str :param cls: Class identifying children :type cls: inventory.Resource :param parent: Parent of object :type parent: CtsCollection :rtype: collections.defaultdict.<basestring, inventory.Resource> :returns: Dictionary of children """ children = [] for child in xml.xpath(xpath, namespaces=XPATH_NAMESPACES): children.append(cls.parse( resource=child, parent=parent, **kwargs )) return children def _parse_structured_metadata(obj, xml): """ Parse an XML object for structured metadata :param obj: Object whose metadata are parsed :param xml: XML that needs to be parsed """ for metadata in xml.xpath("cpt:structured-metadata/*", namespaces=XPATH_NAMESPACES): tag = metadata.tag if "{" in tag: ns, tag = tuple(tag.split("}")) tag = URIRef(ns[1:]+tag) s_m = str(metadata) if s_m.startswith("urn:") or s_m.startswith("http:") or s_m.startswith("https:") or s_m.startswith("hdl:"): obj.metadata.add( tag, URIRef(metadata) ) elif '{http://www.w3.org/XML/1998/namespace}lang' in metadata.attrib: obj.metadata.add( tag, s_m, lang=metadata.attrib['{http://www.w3.org/XML/1998/namespace}lang'] ) else: if "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}datatype" in metadata.attrib: datatype = metadata.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}datatype"] if not datatype.startswith("http") and ":" in datatype: datatype = expand_namespace(metadata.nsmap, datatype) obj.metadata.add(tag, Literal(s_m, datatype=URIRef(datatype))) elif isinstance(metadata, IntElement): obj.metadata.add(tag, Literal(int(metadata), datatype=XSD.integer)) elif isinstance(metadata, FloatElement): obj.metadata.add(tag, Literal(float(metadata), datatype=XSD.float)) else: obj.metadata.add(tag, s_m)
[docs]class XmlCtsTextMetadata(cts.CtsTextMetadata): """ Represents a CTS CtsTextMetadata """ DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE CLASS_CITATION = XmlCtsCitation @staticmethod def __findCitations(obj, xml, xpath="ti:citation"): """ Find citation in current xml. Used as a loop for xmlparser() :param xml: Xml resource to be parsed :param xpath: Xpath to use to retrieve the xml node """
[docs] @classmethod def parse_metadata(cls, obj, xml): """ Parse a resource to feed the object :param obj: Obj to set metadata of :type obj: XmlCtsTextMetadata :param xml: An xml representation object :type xml: lxml.etree._Element """ for child in xml.xpath("ti:description", namespaces=XPATH_NAMESPACES): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: obj.set_cts_property("description", child.text, lg) for child in xml.xpath("ti:label", namespaces=XPATH_NAMESPACES): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: obj.set_cts_property("label", child.text, lg) obj.citation = cls.CLASS_CITATION.ingest(xml, obj.citation, "ti:online/ti:citationMapping/ti:citation") # Added for commentary for child in xml.xpath("ti:about", namespaces=XPATH_NAMESPACES): obj.set_link(RDF_NAMESPACES.CTS.term("about"), child.get('urn')) _parse_structured_metadata(obj, xml) """ online = xml.xpath("ti:online", namespaces=NS) if len(online) > 0: online = online[0] obj.docname = online.get("docname") for validate in online.xpath("ti:validate", namespaces=NS): obj.validate = validate.get("schema") for namespaceMapping in online.xpath("ti:namespaceMapping", namespaces=NS): obj.metadata["namespaceMapping"][namespaceMapping.get("abbreviation")] = namespaceMapping.get("nsURI") """
def __init__(self, *args, **kwargs): super(XmlCtsTextMetadata, self).__init__(*args, **kwargs) self._path = None @property def path(self): return self._path @path.setter def path(self, value): self._path = value
[docs]class XmlCtsEditionMetadata(cts.CtsEditionMetadata, XmlCtsTextMetadata): """ Create an edition subtyped CtsTextMetadata object """
[docs] @classmethod def parse(cls, resource, parent=None): xml = xmlparser(resource) o = cls(urn=xml.get("urn"), parent=parent) cls.parse_metadata(o, xml) return o
[docs]class XmlCtsTranslationMetadata(cts.CtsTranslationMetadata, XmlCtsTextMetadata): """ Create a translation subtyped CtsTextMetadata object """
[docs] @classmethod def parse(cls, resource, parent=None): xml = xmlparser(resource) lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") o = cls(urn=xml.get("urn"), parent=parent) if lang is not None: o.lang = lang cls.parse_metadata(o, xml) return o
[docs]class XmlCtsCommentaryMetadata(cts.CtsCommentaryMetadata, XmlCtsTextMetadata): """ Create a commentary subtyped PrototypeText object """
[docs] @classmethod def parse(cls, resource, parent=None): xml = xmlparser(resource) lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") o = cls(urn=xml.get("urn"), parent=parent) if lang is not None: o.lang = lang cls.parse_metadata(o, xml) return o
[docs]class XmlCtsWorkMetadata(cts.CtsWorkMetadata): """ Represents a CTS Textgroup in XML """ CLASS_EDITION = XmlCtsEditionMetadata CLASS_TRANSLATION = XmlCtsTranslationMetadata CLASS_COMMENTARY = XmlCtsCommentaryMetadata
[docs] @classmethod def parse(cls, resource, parent=None, _with_children=False): """ Parse a resource :param resource: Element rerpresenting a work :param parent: Parent of the object :type parent: XmlCtsTextgroupMetadata """ xml = xmlparser(resource) o = cls(urn=xml.get("urn"), parent=parent) lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") if lang is not None: o.lang = lang for child in xml.xpath("ti:title", namespaces=XPATH_NAMESPACES): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: o.set_cts_property("title", child.text, lg) # Parse children children = [] children.extend(_xpathDict( xml=xml, xpath='ti:edition', cls=cls.CLASS_EDITION, parent=o )) children.extend(_xpathDict( xml=xml, xpath='ti:translation', cls=cls.CLASS_TRANSLATION, parent=o )) children.extend(_xpathDict( xml=xml, xpath='ti:commentary', cls=cls.CLASS_COMMENTARY, parent=o )) _parse_structured_metadata(o, xml) if _with_children: return o, children return o
[docs]class XmlCtsTextgroupMetadata(cts.CtsTextgroupMetadata): """ Represents a CTS Textgroup in XML """ CLASS_WORK = XmlCtsWorkMetadata
[docs] @classmethod def parse(cls, resource, parent=None): """ Parse a textgroup resource :param resource: Element representing the textgroup :param parent: Parent of the textgroup """ xml = xmlparser(resource) o = cls(urn=xml.get("urn"), parent=parent) for child in xml.xpath("ti:groupname", namespaces=XPATH_NAMESPACES): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: o.set_cts_property("groupname", child.text, lg) # Parse Works _xpathDict(xml=xml, xpath='ti:work', cls=cls.CLASS_WORK, parent=o) _parse_structured_metadata(o, xml) return o
[docs]class XmlCtsTextInventoryMetadata(cts.CtsTextInventoryMetadata): """ Represents a CTS Inventory file """ CLASS_TEXTGROUP = XmlCtsTextgroupMetadata
[docs] @classmethod def parse(cls, resource): """ Parse a resource :param resource: Element representing the text inventory """ xml = xmlparser(resource) o = cls(name=xml.xpath("//ti:TextInventory", namespaces=XPATH_NAMESPACES)[0].get("tiid") or "") # Parse textgroups _xpathDict(xml=xml, xpath='//ti:textgroup', cls=cls.CLASS_TEXTGROUP, parent=o) return o