Source code for MyCapytain.resources.collections.cts

# -*- coding: utf-8 -*-
"""
.. module:: MyCapytain.resources.xml
   :synopsis: XML based Text and repository

.. moduleauthor:: Thibault Clérice <leponteineptique@gmail.com>


"""
from __future__ import unicode_literals
from six import text_type

from MyCapytain.resources.prototypes import text
from MyCapytain.resources.prototypes.cts import inventory as cts
from MyCapytain.common.reference import Citation as CitationPrototype, URN
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import NS, Mimetypes
import re
from collections import defaultdict


[docs]class Citation(CitationPrototype): """ Citation XML implementation for TextInventory """ escape = re.compile('(")') def __str__(self): """ Returns a string text inventory version of the object :Example: >>> a = Citation(name="book", xpath="/tei:TEI/tei:body/tei:text/tei:div", scope="/tei:div[@n=\"1\"]") >>> str(a) == <ti:citation label='book' xpath='/tei:TEI/tei:body/tei:text/tei:div' scope='/tei:div[@n=\"1\"]'>...</ti:citation> """ if self.xpath is None and self.scope is None and self.refsDecl is None: return "" child = "" if isinstance(self.child, Citation): child = str(self.child) label = "" if self.name is not None: label = self.name return """<ti:citation label="{label}" xpath="{xpath}" scope="{scope}">{child}</ti:citation>""".format( child=child, xpath=re.sub(Citation.escape, "'", self.xpath), scope=re.sub(Citation.escape, "'", self.scope), label=label ) @staticmethod
[docs] def ingest(resource, element=None, xpath="ti:citation"): """ Ingest xml to create a citation :param resource: XML on which to do xpath :param element: Element where the citation should be stored :param xpath: XPath to use to retrieve citation :return: Citation """ # Reuse of of find citation results = resource.xpath(xpath, namespaces=NS) if len(results) > 0: citation = Citation( name=results[0].get("label"), xpath=results[0].get("xpath"), scope=results[0].get("scope") ) if isinstance(element, Citation): element.child = citation Citation.ingest( resource=results[0], element=element.child ) else: element = citation Citation.ingest( resource=results[0], element=element ) return citation return None
[docs]def xpathDict(xml, xpath, children, parents, **kwargs): """ Returns a default Dict given certain information :param xml: An xml tree :type xml: etree :param xpath: XPath to find children :type basestring: :param children: Object identifying children :type children: inventory.Resource :param parents: Tuple of parents :type parents: tuple.<inventory.Resource> :rtype: collections.defaultdict.<basestring, inventory.Resource> :returns: Dictionary of children """ return defaultdict(children, **dict( ( child.get("urn"), children( resource=child, urn=child.get("urn"), parents=parents, **kwargs ) ) for child in xml.xpath(xpath, namespaces=NS)) )
[docs]class Text(cts.Text): """ Represents a CTS Text """ EXPORT_TO = [Mimetypes.PYTHON.MyCapytain.ReadableText, Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS] DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE def __init__(self, **kwargs): super(Text, self).__init__(**kwargs) def __str__(self): """ Print the xml of the text :rtype: basestring :returns: XML representation of the text """ strings = [] tag_start = "edition" tag_end = tag_start if self.subtype == "Translation": tag_start = "translation" tag_end = "translation" if self.lang: tag_start = tag_start + " xml:lang='" + self.lang + "'" if self.urn is not None: strings.append( "<ti:{0} urn='{1}' workUrn='{2}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format( tag_start, self.urn, self.urn.upTo(URN.WORK) ) ) else: if len(self.parents) > 0 and hasattr(self.parents[0], "urn") is True: strings.append( "<ti:{0} workUrn='{1}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format( tag_start, self.parents[0].urn ) ) else: strings.append( "<ti:{0} xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format( tag_start ) ) namespaces = [] for tag, metadatum in self.metadata: if tag == "namespaceMapping": for abbr, ns in metadatum: namespaces.append( '<ti:namespaceMapping abbreviation=\'{0}\' nsURI=\'{1}\'/>'.format( abbr, ns ) ) else: for lang, value in metadatum: strings.append( "<ti:{tag} xml:lang='{lang}'>{value}</ti:{tag}>".format( tag=tag, lang=lang, value=value ) ) # Maybe should have an online object... docname = "" if self.docname is not None: docname = ' docname=\'{0}\''.format(self.docname) strings.append("<ti:online{0}>".format(docname)) if self.validate is not None: strings.append('<ti:validate schema=\'{0}\'/>'.format(self.validate)) if len(namespaces) > 0: strings.append("".join(namespaces)) if self.citation is not None: strings.append("<ti:citationMapping>") strings.append(str(self.citation)) strings.append("</ti:citationMapping>") strings.append("</ti:online>") strings.append("</ti:{0}>".format(tag_end)) return "".join(strings) def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring, citation :param domain: Domain to prefix IDs :type domain: str :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == Mimetypes.PYTHON.ETREE: return xmlparser(str(self)) elif output == Mimetypes.PYTHON.MyCapytain.ReadableText: complete_metadata = self.metadata for parent in self.parents: if isinstance(parent, cts.CTSCollection) and hasattr(parent, "metadata"): complete_metadata = complete_metadata + parent.metadata return text.CitableText(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs) elif output == Mimetypes.XML.CTS: return str(self) def __findCitations(self, xml, xpath="ti:citation"): """ Find citation in current xml. Used as a loop for xmlparser() :param xml: Xml resource to be parsed :param xpath: Xpath to use to retrieve the xml node """ self.citation = Citation.ingest(xml, self.citation, xpath)
[docs] def parse(self, resource): """ Parse a resource to feed the object :param resource: An xml representation object :type resource: basestring or lxml.etree._Element :returns: None """ xml = xmlparser(resource) self.urn = URN(xml.get("urn")) self.id = str(self.urn) if self.subtype == "Translation": lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") if lang is not None: self.lang = lang for child in xml.xpath("ti:description", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["description"][lg] = child.text for child in xml.xpath("ti:label", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["label"][lg] = child.text self.__findCitations( xml=xml, xpath="ti:online/ti:citationMapping/ti:citation" ) online = xml.xpath("ti:online", namespaces=NS) if len(online) > 0: online = online[0] self.docname = online.get("docname") for validate in online.xpath("ti:validate", namespaces=NS): self.validate = validate.get("schema") for namespaceMapping in online.xpath("ti:namespaceMapping", namespaces=NS): self.metadata["namespaceMapping"][namespaceMapping.get("abbreviation")] = namespaceMapping.get("nsURI") return None
@property def readable(self): """ Readable property should return elements where the element can be queried for getPassage / getReffs :rtype: bool """ return True @property def descendants(self): """ List of descendants :rtype: list """ return []
[docs]def Edition(resource=None, urn=None, parents=None): """ Create an edition subtyped Text object """ return Text(resource=resource, urn=urn, parents=parents, subtype="Edition")
[docs]def Translation(resource=None, urn=None, parents=None): """ Create a translation subtyped Text object """ return Text(resource=resource, urn=urn, parents=parents, subtype="Translation")
[docs]class Work(cts.Work): """ Represents a CTS Textgroup in XML :cvar EXPORT_TO: List of exportable supported formats :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory) """ EXPORT_TO = [Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS] DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE def __init__(self, **kwargs): super(Work, self).__init__(**kwargs) def __str__(self): """ Print the xml of the work :rtype: basestring :returns: XML representation of the work """ strings = [] __lang = "" if self.lang: __lang = """xml:lang="{}" """.format(self.lang) if self.urn is not None: strings.append( "<ti:work {2}urn='{0}' groupUrn='{1}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format( self.urn, self.urn.upTo(URN.TEXTGROUP), __lang) ) else: if len(self.parents) > 0 and hasattr(self.parents[0], "urn") is True: strings.append("<ti:work {1}groupUrn='{0}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format( self.parents[0].urn, __lang) ) else: strings.append("<ti:work {}xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(__lang)) for tag, metadatum in self.metadata: for lang, value in metadatum: strings.append("<ti:{tag} xml:lang='{lang}'>{value}</ti:{tag}>".format(tag=tag, lang=lang, value=value)) # Dev trick : For tests, we need to have always the same order.... keys = sorted([urn for urn in self.texts]) for urn in keys: strings.append(str(self.texts[urn])) strings.append("</ti:work>") return "".join(strings) def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring :param domain: Domain to prefix IDs :type domain: str :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == Mimetypes.PYTHON.ETREE: return xmlparser(str(self)) elif output == Mimetypes.XML.CTS: return str(self)
[docs] def parse(self, resource): """ Parse a resource :param resource: Element rerpresenting a work :param type: basestring, etree._Element """ xml = xmlparser(resource) self.urn = URN(xml.get("urn")) self.id = str(self.urn) lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") if lang is not None: self.lang = lang for child in xml.xpath("ti:title", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["title"][lg] = child.text self.__editions = xpathDict( xml=xml, xpath='ti:edition', children=Edition, parents=[self] + self.parents ) self.__translations = xpathDict( xml=xml, xpath='ti:translation', children=Translation, parents=[self] + self.parents ) self.texts = defaultdict(Text) for urn in self.__editions: self.texts[urn] = self.__editions[urn] for urn in self.__translations: self.texts[urn] = self.__translations[urn] return self.texts
[docs]class TextGroup(cts.TextGroup): """ Represents a CTS Textgroup in XML :cvar EXPORT_TO: List of exportable supported formats :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory) """ EXPORT_TO = [Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS] DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE def __init__(self, **kwargs): super(TextGroup, self).__init__(**kwargs) def __str__(self): """ Print the xml of the text group :rtype: basestring :returns: XML representation of the textgroup """ strings = [] if self.urn is not None: strings.append("<ti:textgroup urn='{0}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(self.urn)) else: strings.append("<ti:textgroup xmlns:ti='http://chs.harvard.edu/xmlns/cts'>") for tag, metadatum in self.metadata: for lang, value in metadatum: strings.append("<ti:{tag} xml:lang='{lang}'>{value}</ti:{tag}>".format(tag=tag, lang=lang, value=value)) for urn in self.works: strings.append(str(self.works[urn])) strings.append("</ti:textgroup>") return "".join(strings) def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring :param domain: Domain to prefix IDs :type domain: str :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == Mimetypes.PYTHON.ETREE: return xmlparser(str(self)) elif output == Mimetypes.XML.CTS: return str(self)
[docs] def parse(self, resource): """ Parse a resource :param resource: Element representing the textgroup :param type: basestring or etree._Element """ xml = xmlparser(resource) self.urn = URN(xml.get("urn")) self.id = str(self.urn) for child in xml.xpath("ti:groupname", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["groupname"][lg] = child.text self.works = xpathDict( xml=xml, xpath='ti:work', children=Work, parents=[self] + self.parents ) return self.works
[docs]class TextInventory(cts.TextInventory): """ Represents a CTS Inventory file :cvar EXPORT_TO: List of exportable supported formats :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory) """ EXPORT_TO = [Mimetypes.PYTHON.ETREE, Mimetypes.XML.CTS] DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE def __init__(self, **kwargs): super(TextInventory, self).__init__(**kwargs) def __str__(self): """ Print the xml of the textinventory :rtype: basestring :returns: XML representation of the textinventory """ strings = [] if self.id is not None: strings.append("<ti:TextInventory tiid='{0}' xmlns:ti='http://chs.harvard.edu/xmlns/cts'>".format(self.id)) else: strings.append("<ti:TextInventory xmlns:ti='http://chs.harvard.edu/xmlns/cts'>") for urn in self.textgroups: strings.append(str(self.textgroups[urn])) strings.append("</ti:TextInventory>") return "".join(strings) def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring :param domain: Domain to prefix IDs :type domain: str :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == Mimetypes.PYTHON.ETREE: return xmlparser(str(self)) elif output == Mimetypes.XML.CTS: return str(self)
[docs] def parse(self, resource): """ Parse a resource :param resource: Element representing the text inventory :param type: basestring, etree._Element """ xml = xmlparser(resource) self.textgroups = xpathDict( xml=xml, xpath='//ti:textgroup', children=TextGroup, parents=[self] ) return self.textgroups