Source code for MyCapytain.resources.collections.cts

# -*- coding: utf-8 -*-
"""
.. module:: MyCapytain.resources.xml
   :synopsis: XML based Text and repository

.. moduleauthor:: Thibault Clérice <leponteineptique@gmail.com>


"""
from __future__ import unicode_literals
from six import text_type

from MyCapytain.resources.prototypes import text
from MyCapytain.resources.prototypes.cts import inventory as cts
from MyCapytain.common.reference import Citation as CitationPrototype, URN
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import NS, Mimetypes
import re
from collections import defaultdict


[docs]class Citation(CitationPrototype):
    """ Citation XML implementation for TextInventory

    """

    escape = re.compile('(")')

    def __str__(self):
        """ Returns a string text inventory version of the object

        :Example:
            >>>    a = Citation(name="book", xpath="/tei:TEI/tei:body/tei:text/tei:div", scope="/tei:div[@n=\"1\"]")
            >>>    str(a) == <ti:citation label='book' xpath='/tei:TEI/tei:body/tei:text/tei:div' scope='/tei:div[@n=\"1\"]'>...</ti:citation>
        """
        if self.xpath is None and self.scope is None and self.refsDecl is None:
            return ""

        child = ""
        if isinstance(self.child, Citation):
            child = str(self.child)

        label = ""
        if self.name is not None:
            label = self.name

        return """<ti:citation label="{label}" xpath="{xpath}" scope="{scope}">{child}</ti:citation>""".format(
            child=child,
            xpath=re.sub(Citation.escape, "'", self.xpath),
            scope=re.sub(Citation.escape, "'", self.scope),
            label=label
        )

    @staticmethod
[docs]    def ingest(resource, element=None, xpath="ti:citation"):
        """ Ingest xml to create a citation

        :param resource: XML on which to do xpath
        :param element: Element where the citation should be stored
        :param xpath: XPath to use to retrieve citation

        :return: Citation
        """
        # Reuse of of find citation
        results = resource.xpath(xpath, namespaces=NS)
        if len(results) > 0:
            citation = Citation(
                name=results[0].get("label"),
                xpath=results[0].get("xpath"),
                scope=results[0].get("scope")
            )

            if isinstance(element, Citation):
                element.child = citation
                Citation.ingest(
                    resource=results[0],
                    element=element.child
                )
            else:
                element = citation
                Citation.ingest(
                    resource=results[0],
                    element=element
                )

            return citation

        return None


[docs]def xpathDict(xml, xpath, children, parents, **kwargs):
    """ Returns a default Dict given certain information

    :param xml: An xml tree
    :type xml: etree
    :param xpath: XPath to find children
    :type basestring:
    :param children: Object identifying children
    :type children: inventory.Resource
    :param parents: Tuple of parents
    :type parents: tuple.<inventory.Resource>
    :rtype: collections.defaultdict.<basestring, inventory.Resource>
    :returns: Dictionary of children
    """
    return defaultdict(children, **dict(
        (
            child.get("urn"),
            children(
                resource=child,
                urn=child.get("urn"),
                parents=parents,
                **kwargs
            )
        ) for child in xml.xpath(xpath, namespaces=NS))
                                                        )


[docs]class Text(cts.Text):
    """ Represents a CTS Text

    """
    EXPORT_TO = [Mimetypes.PYTHON.MyCapytain.ReadableText, Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS]
    DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

    def __init__(self, **kwargs):
        super(Text, self).__init__(**kwargs)

    def __str__(self):
        """ Print the xml of the text
        
        :rtype: basestring
        :returns: XML representation of the text
        """
        strings = []
        tag_start = "edition"
        tag_end = tag_start
        if self.subtype == "Translation":
            tag_start = "translation"
            tag_end = "translation"
            if self.lang:
                tag_start = tag_start + " xml:lang='" + self.lang + "'"

        if self.urn is not None:
            strings.append(
                "<ti:{0} urn='{1}' workUrn='{2}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(
                    tag_start,
                    self.urn,
                    self.urn.upTo(URN.WORK)
                )
            )
        else:
            if len(self.parents) > 0 and hasattr(self.parents[0], "urn") is True:
                strings.append(
                    "<ti:{0} workUrn='{1}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(
                        tag_start,
                        self.parents[0].urn
                    )
                )
            else:
                strings.append(
                    "<ti:{0} xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(
                        tag_start
                    )
                )

        namespaces = []
        for tag, metadatum in self.metadata:
            if tag == "namespaceMapping":
                for abbr, ns in metadatum:
                    namespaces.append(
                        '<ti:namespaceMapping abbreviation=\'{0}\' nsURI=\'{1}\'/>'.format(
                            abbr,
                            ns
                        )
                    )
            else:
                for lang, value in metadatum:
                    strings.append(
                        "<ti:{tag} xml:lang='{lang}'>{value}</ti:{tag}>".format(
                            tag=tag,
                            lang=lang,
                            value=value
                        )
                    )

        # Maybe should have an online object...
        docname = ""
        if self.docname is not None:
            docname = ' docname=\'{0}\''.format(self.docname)

        strings.append("<ti:online{0}>".format(docname))

        if self.validate is not None:
            strings.append('<ti:validate schema=\'{0}\'/>'.format(self.validate))

        if len(namespaces) > 0:
            strings.append("".join(namespaces))

        if self.citation is not None:
            strings.append("<ti:citationMapping>")
            strings.append(str(self.citation))
            strings.append("</ti:citationMapping>")

        strings.append("</ti:online>")

        strings.append("</ti:{0}>".format(tag_end))
        return "".join(strings)

    def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
        """ Create a {format} version of the Work
        
        :param output: Format to be chosen (Only XML for now)
        :type output: basestring, citation
        :param domain: Domain to prefix IDs
        :type domain: str
        :rtype: lxml.etree._Element
        :returns: XML representation of the object
        """
        if output == Mimetypes.PYTHON.ETREE:
            return xmlparser(str(self))
        elif output == Mimetypes.PYTHON.MyCapytain.ReadableText:
            complete_metadata = self.metadata
            for parent in self.parents:
                if isinstance(parent, cts.CTSCollection) and hasattr(parent, "metadata"):
                    complete_metadata = complete_metadata + parent.metadata
            return text.CitableText(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)
        elif output == Mimetypes.XML.CTS:
            return str(self)

    def __findCitations(self, xml, xpath="ti:citation"):
        """ Find citation in current xml. Used as a loop for xmlparser()
        
        :param xml: Xml resource to be parsed
        :param xpath: Xpath to use to retrieve the xml node
        """
        self.citation = Citation.ingest(xml, self.citation, xpath)

[docs]    def parse(self, resource):
        """ Parse a resource to feed the object
        
        :param resource: An xml representation object
        :type resource: basestring or lxml.etree._Element
        :returns: None
        """
        xml = xmlparser(resource)
        self.urn = URN(xml.get("urn"))
        self.id = str(self.urn)

        if self.subtype == "Translation":
            lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lang is not None:
                self.lang = lang

        for child in xml.xpath("ti:description", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["description"][lg] = child.text

        for child in xml.xpath("ti:label", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["label"][lg] = child.text

        self.__findCitations(
            xml=xml,
            xpath="ti:online/ti:citationMapping/ti:citation"
        )

        online = xml.xpath("ti:online", namespaces=NS)
        if len(online) > 0:
            online = online[0]
            self.docname = online.get("docname")
            for validate in online.xpath("ti:validate", namespaces=NS):
                self.validate = validate.get("schema")
            for namespaceMapping in online.xpath("ti:namespaceMapping", namespaces=NS):
                self.metadata["namespaceMapping"][namespaceMapping.get("abbreviation")] = namespaceMapping.get("nsURI")

        return None

    @property
    def readable(self):
        """ Readable property should return elements where the element can be queried for getPassage / getReffs

        :rtype: bool
        """
        return True

    @property
    def descendants(self):
        """ List of descendants

        :rtype: list
        """
        return []


[docs]def Edition(resource=None, urn=None, parents=None):
    """ Create an edition subtyped Text object
    """
    return Text(resource=resource, urn=urn, parents=parents, subtype="Edition")


[docs]def Translation(resource=None, urn=None, parents=None):
    """ Create a translation subtyped Text object
    """
    return Text(resource=resource, urn=urn, parents=parents, subtype="Translation")


[docs]class Work(cts.Work):

    """ Represents a CTS Textgroup in XML

    :cvar EXPORT_TO: List of exportable supported formats
    :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
    """
    EXPORT_TO = [Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS]
    DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

    def __init__(self, **kwargs):

        super(Work, self).__init__(**kwargs)

    def __str__(self):
        """ Print the xml of the work
        
        :rtype: basestring
        :returns: XML representation of the work
        """
        strings = []
        __lang = ""
        if self.lang:
            __lang = """xml:lang="{}" """.format(self.lang)

        if self.urn is not None:
            strings.append(
                "<ti:work {2}urn='{0}' groupUrn='{1}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(
                    self.urn, self.urn.upTo(URN.TEXTGROUP), __lang)
            )
        else:
            if len(self.parents) > 0 and hasattr(self.parents[0], "urn") is True:
                strings.append("<ti:work {1}groupUrn='{0}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(
                    self.parents[0].urn, __lang)
                )
            else:
                strings.append("<ti:work {}xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(__lang))
        for tag, metadatum in self.metadata:
            for lang, value in metadatum:
                strings.append("<ti:{tag} xml:lang='{lang}'>{value}</ti:{tag}>".format(tag=tag, lang=lang, value=value))

        # Dev trick : For tests, we need to have always the same order....
        keys = sorted([urn for urn in self.texts])
        for urn in keys:
            strings.append(str(self.texts[urn]))

        strings.append("</ti:work>")
        return "".join(strings)

    def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""):
        """ Create a {format} version of the Work
        
        :param output: Format to be chosen (Only XML for now)
        :type output: basestring
        :param domain: Domain to prefix IDs
        :type domain: str
        :rtype: lxml.etree._Element
        :returns: XML representation of the object
        """
        if output == Mimetypes.PYTHON.ETREE:
            return xmlparser(str(self))
        elif output == Mimetypes.XML.CTS:
            return str(self)

[docs]    def parse(self, resource):
        """ Parse a resource

        :param resource: Element rerpresenting a work
        :param type: basestring, etree._Element
        """
        xml = xmlparser(resource)
        self.urn = URN(xml.get("urn"))
        self.id = str(self.urn)

        lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang")
        if lang is not None:
            self.lang = lang

        for child in xml.xpath("ti:title", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["title"][lg] = child.text

        self.__editions = xpathDict(
            xml=xml,
            xpath='ti:edition',
            children=Edition,
            parents=[self] + self.parents
        )
        self.__translations = xpathDict(
            xml=xml,
            xpath='ti:translation',
            children=Translation,
            parents=[self] + self.parents
        )

        self.texts = defaultdict(Text)
        for urn in self.__editions:
            self.texts[urn] = self.__editions[urn]
        for urn in self.__translations:
            self.texts[urn] = self.__translations[urn]

        return self.texts


[docs]class TextGroup(cts.TextGroup):

    """ Represents a CTS Textgroup in XML

    :cvar EXPORT_TO: List of exportable supported formats
    :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
    """
    EXPORT_TO = [Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS]
    DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

    def __init__(self, **kwargs):
        super(TextGroup, self).__init__(**kwargs)

    def __str__(self):
        """ Print the xml of the text group
        
        :rtype: basestring
        :returns: XML representation of the textgroup
        """
        strings = []
        if self.urn is not None:
            strings.append("<ti:textgroup urn='{0}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(self.urn))
        else:
            strings.append("<ti:textgroup xmlns:ti='http://chs.harvard.edu/xmlns/cts'>")

        for tag, metadatum in self.metadata:
            for lang, value in metadatum:
                strings.append("<ti:{tag} xml:lang='{lang}'>{value}</ti:{tag}>".format(tag=tag, lang=lang, value=value))

        for urn in self.works:
            strings.append(str(self.works[urn]))

        strings.append("</ti:textgroup>")
        return "".join(strings)

    def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""):
        """ Create a {format} version of the Work

        :param output: Format to be chosen (Only XML for now)
        :type output: basestring
        :param domain: Domain to prefix IDs
        :type domain: str
        :rtype: lxml.etree._Element
        :returns: XML representation of the object
        """
        if output == Mimetypes.PYTHON.ETREE:
            return xmlparser(str(self))
        elif output == Mimetypes.XML.CTS:
            return str(self)

[docs]    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element representing the textgroup
        :param type: basestring or etree._Element
        """
        xml = xmlparser(resource)

        self.urn = URN(xml.get("urn"))
        self.id = str(self.urn)

        for child in xml.xpath("ti:groupname", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["groupname"][lg] = child.text

        self.works = xpathDict(
            xml=xml,
            xpath='ti:work',
            children=Work,
            parents=[self] + self.parents
        )
        return self.works


[docs]class TextInventory(cts.TextInventory):

    """ Represents a CTS Inventory file

    :cvar EXPORT_TO: List of exportable supported formats
    :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
    """
    EXPORT_TO = [Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS]
    DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

    def __init__(self, **kwargs):
        super(TextInventory, self).__init__(**kwargs)

    def __str__(self):
        """ Print the xml of the textinventory
        
        :rtype: basestring
        :returns: XML representation of the textinventory
        """
        strings = []
        if self.id is not None:
            strings.append("<ti:TextInventory tiid='{0}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(self.id))
        else:
            strings.append("<ti:TextInventory xmlns:ti='http://chs.harvard.edu/xmlns/cts'>")

        for urn in self.textgroups:
            strings.append(str(self.textgroups[urn]))
        strings.append("</ti:TextInventory>")
        return "".join(strings)

    def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""):
        """ Create a {format} version of the Work

        :param output: Format to be chosen (Only XML for now)
        :type output: basestring
        :param domain: Domain to prefix IDs
        :type domain: str
        :rtype: lxml.etree._Element
        :returns: XML representation of the object
        """
        if output == Mimetypes.PYTHON.ETREE:
            return xmlparser(str(self))
        elif output == Mimetypes.XML.CTS:
            return str(self)

[docs]    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element representing the text inventory
        :param type: basestring, etree._Element
        """
        xml = xmlparser(resource)

        self.textgroups = xpathDict(
            xml=xml,
            xpath='//ti:textgroup',
            children=TextGroup,
            parents=[self]
        )
        return self.textgroups