Source code for MyCapytain.resources.texts.remote.cts

# -*- coding: utf-8 -*-
"""
.. module:: MyCapytain.resources.texts.remote.cts
   :synopsis: CtsTextMetadata and CapitainsCtsPassage implementation for dealing with CTS API Responses

.. moduleauthor:: Thibault Clérice <leponteineptique@gmail.com>


"""

from __future__ import unicode_literals

from MyCapytain.common.metadata import Metadata
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes, RDF_NAMESPACES
from MyCapytain.common.reference import URN, Reference
from MyCapytain.resources.collections import cts as CtsCollection
from MyCapytain.resources.prototypes import text as prototypes
from MyCapytain.resources.texts.base.tei import TEIResource
from MyCapytain.errors import MissingAttribute


class __SharedMethod__(prototypes.InteractiveTextualNode):
    """ Set of methods shared by CtsTextMetadata and CapitainsCtsPassage

    :param retriever: Retriever used to retrieve other data
    :type retriever: MyCapytain.retrievers.prototypes.CitableTextServiceRetriever
    """

    @property
    def depth(self):
        """ Depth of the current opbject

        :return: Int representation of the depth based on URN information
        :rtype: int
        """
        if self.urn.reference:
            return len(self.urn.reference)

    def __init__(self, retriever=None, *args, **kwargs):
        super(__SharedMethod__, self).__init__(*args, **kwargs)
        self.__retriever__ = retriever
        self.__first__ = False
        self.__last__ = False
        if retriever is None:
            raise MissingAttribute("Object has not retriever")

    @property
    def retriever(self):
        """ Retriever object used to query for more data

        :rtype: CitableTextServiceRetriever
        """
        return self.__retriever__

    def getValidReff(self, level=1, reference=None):
        """ Given a resource, CitableText will compute valid reffs

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: Int
        :param reference: CapitainsCtsPassage reference
        :type reference: Reference
        :rtype: list(str)
        :returns: List of levels
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        if level == -1:
            level = len(self.citation)

        xml = self.retriever.getValidReff(
            level=level,
            urn=urn
        )
        xml = xmlparser(xml)
        self.__parse_request__(xml.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0])

        return [ref.split(":")[-1] for ref in xml.xpath("//ti:reply//ti:urn/text()", namespaces=XPATH_NAMESPACES)]

    def getTextualNode(self, subreference=None):
        """ Retrieve a passage and store it in the object

        :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \
        compose the reference)
        :type subreference: Union[Reference, URN, str, list]
        :rtype: CtsPassage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(subreference, URN):
            urn = str(subreference)
        elif isinstance(subreference, Reference):
            urn = "{0}:{1}".format(self.urn, str(subreference))
        elif isinstance(subreference, str):
            if ":" in subreference:
                urn = subreference
            else:
                urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE), subreference)
        elif isinstance(subreference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(subreference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(response.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0])
        return CtsPassage(urn=urn, resource=response, retriever=self.retriever)

    def getReffs(self, level=1, subreference=None):
        """ Reference available at a given level

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: Int
        :param subreference: Subreference (optional)
        :type subreference: Reference
        :rtype: [text_type]
        :returns: List of levels
        """
        if self.depth is not None:
            level += self.depth

        return self.getValidReff(level, subreference)

    def getPassagePlus(self, reference=None):
        """ Retrieve a passage and informations around it and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference or List of text_type
        :rtype: CtsPassage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassagePlus(urn=urn))

        passage = CtsPassage(urn=urn, resource=response, retriever=self.retriever)
        passage.__parse_request__(response.xpath("//ti:reply/ti:label", namespaces=XPATH_NAMESPACES)[0])
        self.citation = passage.citation
        return passage

    def __parse_request__(self, xml):
        """ Parse a request with metadata information

        :param xml: LXML Object
        :type xml: Union[lxml.etree._Element]
        """
        for node in xml.xpath(".//ti:groupname", namespaces=XPATH_NAMESPACES):
            lang = node.get("xml:lang") or CtsText.DEFAULT_LANG
            self.metadata.add(RDF_NAMESPACES.CTS.groupname, lang=lang, value=node.text)
            self.set_creator(node.text, lang)

        for node in xml.xpath(".//ti:title", namespaces=XPATH_NAMESPACES):
            lang = node.get("xml:lang") or CtsText.DEFAULT_LANG
            self.metadata.add(RDF_NAMESPACES.CTS.title, lang=lang, value=node.text)
            self.set_title(node.text, lang)

        for node in xml.xpath(".//ti:label", namespaces=XPATH_NAMESPACES):
            lang = node.get("xml:lang") or CtsText.DEFAULT_LANG
            self.metadata.add(RDF_NAMESPACES.CTS.label, lang=lang, value=node.text)
            self.set_subject(node.text, lang)

        for node in xml.xpath(".//ti:description", namespaces=XPATH_NAMESPACES):
            lang = node.get("xml:lang") or CtsText.DEFAULT_LANG
            self.metadata.add(RDF_NAMESPACES.CTS.description, lang=lang, value=node.text)
            self.set_description(node.text, lang)

        # Need to code that p
        if self.citation.isEmpty() and xml.xpath("//ti:citation", namespaces=XPATH_NAMESPACES):
            self.citation = CtsCollection.XmlCtsCitation.ingest(
                xml,
                xpath=".//ti:citation[not(ancestor::ti:citation)]"
            )

    def getLabel(self):
        """ Retrieve metadata about the text

        :rtype: Metadata
        :returns: Dictionary with label informations
        """
        response = xmlparser(
            self.retriever.getLabel(urn=str(self.urn))
        )

        self.__parse_request__(
            response.xpath("//ti:reply/ti:label", namespaces=XPATH_NAMESPACES)[0]
        )

        return self.metadata

    def getPrevNextUrn(self, reference):
        """ Get the previous URN of a reference of the text

        :param reference: Reference from which to find siblings
        :type reference: Union[Reference, str]
        :return: (Previous CapitainsCtsPassage Reference,Next CapitainsCtsPassage Reference)
        """
        _prev, _next = __SharedMethod__.prevnext(
            self.retriever.getPrevNextUrn(
                urn="{}:{}".format(
                    str(
                        URN(
                            str(self.urn)).upTo(URN.NO_PASSAGE)
                    ),
                    str(reference)
                )
            )
        )
        return _prev, _next

    def getFirstUrn(self, reference=None):
        """ Get the first children URN for a given resource

        :param reference: Reference from which to find child (If None, find first reference)
        :type reference: Reference, str
        :return: Children URN
        :rtype: URN
        """
        if reference is not None:
            if ":" in reference:
                urn = reference
            else:
                urn = "{}:{}".format(
                    str(URN(str(self.urn)).upTo(URN.NO_PASSAGE)),
                    str(reference)
                )
        else:
            urn = str(self.urn)
        _first = __SharedMethod__.firstUrn(
            self.retriever.getFirstUrn(
                urn
            )
        )
        return _first

    @property
    def firstId(self):
        """ Children passage

        :rtype: str
        :returns: First children of the graph. Shortcut to self.graph.children[0]
        """
        if self.__first__ is False:
            # Request the next urn
            self.__first__ = self.getFirstUrn()
        return self.__first__

    @property
    def lastId(self):
        """ Children passage

        :rtype: str
        :returns: First children of the graph. Shortcut to self.graph.children[0]
        """
        if self.__last__ is False:
            # Request the next urn
            self.__last__ = self.childIds[-1]
        return self.__last__

    @staticmethod
    def firstUrn(resource):
        """ Parse a resource to get the first URN

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: str
        """
        resource = xmlparser(resource)
        urn = resource.xpath("//ti:reply/ti:urn/text()", namespaces=XPATH_NAMESPACES, magic_string=True)

        if len(urn) > 0:
            urn = str(urn[0])
            return urn.split(":")[-1]

    @staticmethod
    def prevnext(resource):
        """ Parse a resource to get the prev and next urn

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: (str, str)
        """
        _prev, _next = False, False
        resource = xmlparser(resource)
        prevnext = resource.xpath("//ti:prevnext", namespaces=XPATH_NAMESPACES)

        if len(prevnext) > 0:
            _next, _prev = None, None
            prevnext = prevnext[0]
            _next_xpath = prevnext.xpath("ti:next/ti:urn/text()", namespaces=XPATH_NAMESPACES, smart_strings=False)
            _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()", namespaces=XPATH_NAMESPACES, smart_strings=False)

            if len(_next_xpath):
                _next = _next_xpath[0].split(":")[-1]

            if len(_prev_xpath):
                _prev = _prev_xpath[0].split(":")[-1]

        return _prev, _next


[docs]class CtsText(__SharedMethod__, prototypes.CitableText):
    """ API CtsTextMetadata object

    :param urn: A URN identifier
    :type urn: Union[URN, str, unicode]
    :param resource: An API endpoint
    :type resource: CitableTextServiceRetriever
    :param citation: XmlCtsCitation for children level
    :type citation: XmlCtsCitation
    :param id: Identifier of the subreference without URN informations
    :type id: List

    """

    DEFAULT_LANG = "eng"

    def __init__(self, urn, retriever, citation=None, **kwargs):
        super(CtsText, self).__init__(retriever=retriever, urn=urn, citation=citation, **kwargs)

    @property
    def reffs(self):
        """ Get all valid reffs for every part of the CitableText

        :rtype: MyCapytain.resources.texts.tei.XmlCtsCitation
        """
        if self.citation.isEmpty():
            self.getLabel()
        return [
            reff for reffs in [self.getValidReff(level=i) for i in range(1, len(self.citation) + 1)] for reff in reffs
        ]

    @property
    def nextId(self):
        raise NotImplementedError

    @property
    def next(self):
        raise NotImplementedError

    @property
    def prev(self):
        raise NotImplementedError

    @property
    def prevId(self):
        raise NotImplementedError

    @property
    def siblingsId(self):
        raise NotImplementedError

[docs]    def export(self, output=Mimetypes.PLAINTEXT, exclude=None, **kwargs):
        """ Export the collection item in the Mimetype required.

        ..note:: If current implementation does not have special mimetypes, reuses default_export method

        :param output: Mimetype to export to (Uses Mimetypes)
        :type output: str
        :param exclude: Informations to exclude. Specific to implementations
        :type exclude: [str]
        :return: Object using a different representation
        """
        return self.getTextualNode().export(output, exclude)


[docs]class CtsPassage(__SharedMethod__, prototypes.Passage, TEIResource):
    """ CapitainsCtsPassage representing

    :param urn:
    :param resource:
    :param retriever:
    :param args:
    :param kwargs:
    """

    def __init__(self, urn, resource, *args, **kwargs):
        SuperKwargs = {key: value for key, value in kwargs.items() if key not in ["parent"]}
        super(CtsPassage, self).__init__(resource=resource, *args, **SuperKwargs)
        self.urn = urn

        # Could be set during parsing
        self.__nextId__ = False
        self.__prev__ = False
        self.__first__ = False
        self.__last__ = False

        self.__parse__()

    @property
    def id(self):
        return str(self.urn.reference)

    @property
    def prevId(self):
        """ Previous passage Identifier

        :rtype: CtsPassage
        :returns: Previous passage at same level
        """
        if self.__prev__ is False:
            # Request the next urn
            self.__prev__, self.__nextId__ = self.getPrevNextUrn(reference=self.urn.reference)
        return self.__prev__

    @property
    def parentId(self):
        """ Shortcut for getting the parent passage identifier

        :rtype: Reference
        :returns: Following passage reference
        """
        return str(self.urn.reference.parent)

    @property
    def nextId(self):
        """ Shortcut for getting the following passage identifier

        :rtype: Reference
        :returns: Following passage reference
        """
        if self.__nextId__ is False:
            # Request the next urn
            self.__prev__, self.__nextId__ = self.getPrevNextUrn(reference=self.urn.reference)
        return self.__nextId__

    @property
    def siblingsId(self):
        """ Shortcut for getting the previous and next passage identifier

        :rtype: Reference
        :returns: Following passage reference
        """
        if self.__nextId__ is False or self.__prev__ is False:
            self.__prev__, self.__nextId__ = self.getPrevNextUrn(reference=self.urn.reference)
        return self.__prev__, self.__nextId__

    def __parse__(self):
        """ Given self.resource, split information from the CTS API

        :return: None
        """
        self.response = self.resource
        self.resource = self.resource.xpath("//ti:passage/tei:TEI", namespaces=XPATH_NAMESPACES)[0]

        self.__prev__, self.__nextId__ = __SharedMethod__.prevnext(self.response)

        if self.citation.isEmpty() and len(self.resource.xpath("//ti:citation", namespaces=XPATH_NAMESPACES)):
            self.citation = CtsCollection.XmlCtsCitation.ingest(
                self.response,
                xpath=".//ti:citation[not(ancestor::ti:citation)]"
            )