"""
"""
import io
import logging
import os.path
from glob import glob
from math import ceil
from MyCapytain.common.reference._capitains_cts import CtsReference, URN
from MyCapytain.common.utils.xml import xmlparser
from MyCapytain.errors import InvalidURN, UnknownObjectError, UndispatchedTextError
from MyCapytain.resolvers.prototypes import Resolver
from MyCapytain.resolvers.utils import CollectionDispatcher
from MyCapytain.resources.collections.cts import XmlCtsTextInventoryMetadata, XmlCtsTextgroupMetadata, \
XmlCtsWorkMetadata, XmlCtsCitation, XmlCtsTextMetadata as InventoryText, \
XmlCtsTranslationMetadata, XmlCtsEditionMetadata, XmlCtsCommentaryMetadata
from MyCapytain.resources.prototypes.cts.inventory import CtsEditionMetadata, CtsTextgroupMetadata, CtsWorkMetadata, \
CtsCommentaryMetadata, CtsTextInventoryCollection, CtsTranslationMetadata, CtsTextInventoryMetadata
from MyCapytain.resources.prototypes.cts.inventory import CtsTextInventoryCollection
from MyCapytain.resources.texts.local.capitains.cts import CapitainsCtsText
__all__ = [
"CtsCapitainsLocalResolver"
]
[docs]class CtsCapitainsLocalResolver(Resolver):
""" XML Folder Based resolver. CtsTextMetadata and metadata resolver based on local directories
:param resource: Resource should be a list of folders retaining data as Capitains Guidelines Repositories
:type resource: [str]
:param name: Key used to differentiate Repository and thus enabling different repo to be used
:type name: str
:param logger: Logging object
:type logger: logging
:cvar TEXT_CLASS: CtsTextMetadata Class [not instantiated] to be used to parse Texts. Can be changed to support Cache for example
:type TEXT_CLASS: class
:cvar DEFAULT_PAGE: Default Page to show
:cvar PER_PAGE: Tuple representing the minimal number of texts returned, the default number and the maximum number of texts returned
"""
CLASSES = {
"text": CapitainsCtsText,
"edition": XmlCtsEditionMetadata,
"translation": XmlCtsTranslationMetadata,
"commentary": XmlCtsCommentaryMetadata,
"work": XmlCtsWorkMetadata,
"textgroup": XmlCtsTextgroupMetadata,
"inventory": XmlCtsTextInventoryMetadata,
"inventory_collection": CtsTextInventoryCollection,
"citation": XmlCtsCitation
}
DEFAULT_PAGE = 1
PER_PAGE = (1, 10, 100) # Min, Default, Mainvex,
RAISE_ON_UNDISPATCHED = False
RAISE_ON_GENERIC_PARSING_ERROR = True
@property
def inventory(self):
return self.__inventory__
@inventory.setter
def inventory(self, value):
self.__inventory__ = value
@property
def texts(self):
return self.inventory.readableDescendants
def __init__(self, resource, name=None, logger=None, dispatcher=None, autoparse=True):
""" Initiate the XMLResolver
"""
self.classes = {}
self.classes.update(type(self).CLASSES)
if dispatcher is None:
inventory_collection = self.classes["inventory_collection"](identifier="defaultTic")
ti = self.classes["inventory"]("default")
ti.parent = inventory_collection
ti.set_label("Default collection", "eng")
self.dispatcher = CollectionDispatcher(inventory_collection)
else:
self.dispatcher = dispatcher
self.__inventory__ = self.dispatcher.collection
self.name = name
self.logger = logger
if not logger:
self.logger = logging.getLogger(name)
if not name:
self.name = "repository"
self.works = []
if autoparse:
self.parse(resource)
[docs] def xmlparse(self, file):
""" Parse a XML file
:param file: Opened File
:return: Tree
"""
return xmlparser(file)
[docs] def read(self, identifier, path):
""" Retrieve and parse a text given an identifier
:param identifier: Identifier of the text
:type identifier: str
:param path: Path of the text
:type path: str
:return: Parsed Text
:rtype: CapitainsCtsText
"""
with open(path) as f:
o = self.classes["text"](urn=identifier, resource=self.xmlparse(f))
return o
def _parse_textgroup_wrapper(self, cts_file):
""" Wraps with a Try/Except the textgroup parsing from a cts file
:param cts_file: Path to the CTS File
:type cts_file: str
:return: CtsTextgroupMetadata
"""
try:
return self._parse_textgroup(cts_file)
except Exception as E:
self.logger.error("Error parsing %s ", cts_file)
if self.RAISE_ON_GENERIC_PARSING_ERROR:
raise E
def _parse_textgroup(self, cts_file):
""" Parses a textgroup from a cts file
:param cts_file: Path to the CTS File
:type cts_file: str
:return: CtsTextgroupMetadata and Current file
"""
with io.open(cts_file) as __xml__:
return self.classes["textgroup"].parse(
resource=__xml__
), cts_file
def _parse_work_wrapper(self, cts_file, textgroup):
""" Wraps with a Try/Except the Work parsing from a cts file
:param cts_file: Path to the CTS File
:type cts_file: str
:param textgroup: Textgroup to which the Work is a part of
:type textgroup: CtsTextgroupMetadata
:return: Parsed Work and the Texts, as well as the current file directory
"""
try:
return self._parse_work(cts_file, textgroup)
except Exception as E:
self.logger.error("Error parsing %s ", cts_file)
if self.RAISE_ON_GENERIC_PARSING_ERROR:
raise E
def _parse_work(self, cts_file, textgroup):
""" Parses a work from a cts file
:param cts_file: Path to the CTS File
:type cts_file: str
:param textgroup: Textgroup to which the Work is a part of
:type textgroup: CtsTextgroupMetadata
:return: Parsed Work and the Texts, as well as the current file directory
"""
with io.open(cts_file) as __xml__:
work, texts = self.classes["work"].parse(
resource=__xml__,
parent=textgroup,
_with_children=True
)
return work, texts, os.path.dirname(cts_file)
def _parse_text(self, text, directory):
""" Complete the TextMetadata object with its citation scheme by parsing the original text
:param text: Text Metadata collection
:type text: XmlCtsTextMetadata
:param directory: Directory in which the metadata was found and where the text file should be
:type directory: str
:returns: True if all went well
:rtype: bool
"""
text_id, text_metadata = text.id, text
text_metadata.path = "{directory}/{textgroup}.{work}.{version}.xml".format(
directory=directory,
textgroup=text_metadata.urn.textgroup,
work=text_metadata.urn.work,
version=text_metadata.urn.version
)
if os.path.isfile(text_metadata.path):
try:
text = self.read(text_id, path=text_metadata.path)
cites = list()
for cite in [c for c in text.citation][::-1]:
if len(cites) >= 1:
cites.append(self.classes["citation"](
xpath=cite.xpath.replace("'", '"'),
scope=cite.scope.replace("'", '"'),
name=cite.name,
child=cites[-1]
))
else:
cites.append(self.classes["citation"](
xpath=cite.xpath.replace("'", '"'),
scope=cite.scope.replace("'", '"'),
name=cite.name
))
del text
text_metadata.citation = cites[-1]
self.logger.info("%s has been parsed ", text_metadata.path)
if not text_metadata.citation.is_set():
self.logger.error("%s has no passages", text_metadata.path)
return False
return True
except Exception:
self.logger.error(
"%s does not accept parsing at some level (most probably citation) ",
text_metadata.path
)
return False
else:
self.logger.error("%s is not present", text_metadata.path)
return False
def _dispatch(self, textgroup, directory):
""" Run the dispatcher over a textgroup.
:param textgroup: Textgroup object that needs to be dispatched
:param directory: Directory in which the textgroup was found
"""
if textgroup.id in self.dispatcher.collection:
self.dispatcher.collection[textgroup.id].update(textgroup)
else:
self.dispatcher.dispatch(textgroup, path=directory)
for work_urn, work in textgroup.works.items():
if work_urn in self.dispatcher.collection[textgroup.id].works:
self.dispatcher.collection[work_urn].update(work)
def _dispatch_container(self, textgroup, directory):
""" Run the dispatcher over a textgroup within a try/except block
.. note:: This extraction allows to change the dispatch routine \
without having to care for the error dispatching
:param textgroup: Textgroup object that needs to be dispatched
:param directory: Directory in which the textgroup was found
"""
try:
self._dispatch(textgroup, directory)
except UndispatchedTextError as E:
self.logger.error("Error dispatching %s ", directory)
if self.RAISE_ON_UNDISPATCHED is True:
raise E
def _clean_invalids(self, invalids):
""" Optionally remove texts that were found to be invalid
:param invalids: List of text identifiers
:type invalids: [CtsTextMetadata]
"""
pass
[docs] def parse(self, resource):
""" Parse a list of directories and reads it into a collection
:param resource: List of folders
:return: An inventory resource and a list of CtsTextMetadata metadata-objects
"""
textgroups = []
texts = []
invalids = []
for folder in resource:
cts_files = glob("{base_folder}/data/*/__cts__.xml".format(base_folder=folder))
for cts_file in cts_files:
textgroup, cts_file = self._parse_textgroup_wrapper(cts_file)
textgroups.append((textgroup, cts_file))
for textgroup, cts_textgroup_file in textgroups:
cts_work_files = glob("{parent}/*/__cts__.xml".format(parent=os.path.dirname(cts_textgroup_file)))
for cts_work_file in cts_work_files:
_, parsed_texts, directory = self._parse_work_wrapper(cts_work_file, textgroup)
texts.extend([(text, directory) for text in parsed_texts])
for text, directory in texts:
# If text_id is not none, the text parsing errored
if not self._parse_text(text, directory):
invalids.append(text)
# Dispatching routine
for textgroup, textgroup_path in textgroups:
self._dispatch_container(textgroup, textgroup_path)
# Clean invalids if there was a need
self._clean_invalids(invalids)
self.inventory = self.dispatcher.collection
return self.inventory
def __getText__(self, urn):
""" Returns a CtsTextMetadata object
:param urn: URN of a text to retrieve
:type urn: str, URN
:return: Textual resource and metadata
:rtype: (CapitainsCtsText, InventoryText)
"""
if not isinstance(urn, URN):
urn = URN(urn)
if len(urn) != 5:
if len(urn) == 4:
urn, reference = urn.upTo(URN.WORK), str(urn.reference)
urn = [
t.id
for t in self.texts
if t.id.startswith(str(urn)) and isinstance(t, CtsEditionMetadata)
]
if len(urn) > 0:
urn = URN(urn[0])
else:
raise UnknownObjectError
else:
raise InvalidURN
text = self.inventory[str(urn)]
if os.path.isfile(text.path):
with io.open(text.path) as __xml__:
resource = self.classes["text"](urn=urn, resource=self.xmlparse(__xml__))
else:
resource = None
self.logger.warning('The file {} is mentioned in the metadata but does not exist'.format(text.path))
return resource, text
def __getTextMetadata__(self,
urn=None, page=None, limit=None,
lang=None, category=None, pagination=False
):
""" Retrieve a slice of the inventory filtered by given arguments
:param urn: Partial URN to use to filter out resources
:type urn: str
:param page: Page to show
:type page: int
:param limit: Item Per Page
:type limit: int
:param inventory: Inventory name
:type inventory: str
:param lang: Language to filter on
:type lang: str
:param category: Type of elements to show
:type category: str
:param pagination: Activate pagination
:type pagination: bool
:return: ([Matches], Page, Count)
:rtype: ([CtsTextMetadata], int, int)
"""
__PART = None
if urn is not None:
if isinstance(urn, URN):
_urn = urn
else:
_urn = URN(urn)
__PART = [None, None, URN.NAMESPACE, URN.TEXTGROUP, URN.WORK, URN.VERSION, URN.COMPLETE][len(_urn)]
matches = [
text
for text in self.texts
if
(lang is None or (lang is not None and lang == text.lang)) and
(urn is None or (urn is not None and text.urn.upTo(__PART) == urn)) and
(text.citation is not None) and
(
category not in ["edition", "translation", "commentary"] or
(category in ["edition", "translation", "commentary"] and category.lower() == text.subtype.lower())
)
]
if pagination:
start_index, end_index, page, count = type(self).pagination(page, limit, len(matches))
else:
start_index, end_index, page, count = None, None, 0, len(matches)
return matches[start_index:end_index], page, count
[docs] def getTextualNode(self, textId, subreference=None, prevnext=False, metadata=False):
""" Retrieve a text node from the API
:param textId: CtsTextMetadata Identifier
:type textId: str
:param subreference: CapitainsCtsPassage CtsReference
:type subreference: str
:param prevnext: Retrieve graph representing previous and next passage
:type prevnext: boolean
:param metadata: Retrieve metadata about the passage and the text
:type metadata: boolean
:return: CapitainsCtsPassage
:rtype: CapitainsCtsPassage
"""
text, text_metadata = self.__getText__(textId)
if subreference is not None and not isinstance(subreference, CtsReference):
subreference = CtsReference(subreference)
passage = text.getTextualNode(subreference)
if metadata:
passage.set_metadata_from_collection(text_metadata)
return passage
[docs] def getSiblings(self, textId, subreference: CtsReference):
""" Retrieve the siblings of a textual node
:param textId: CtsTextMetadata Identifier
:type textId: str
:param subreference: CapitainsCtsPassage CtsReference
:type subreference: str
:return: Tuple of references
:rtype: (str, str)
"""
text, inventory = self.__getText__(textId)
if not isinstance(subreference, CtsReference):
subreference = CtsReference(subreference)
passage = text.getTextualNode(subreference)
return passage.siblingsId
[docs] def getReffs(self, textId, level=1, subreference=None):
""" Retrieve the siblings of a textual node
:param textId: CtsTextMetadata Identifier
:type textId: str
:param level: Depth for retrieval
:type level: int
:param subreference: CapitainsCtsPassage CtsReference
:type subreference: str
:return: List of references
:rtype: [str]
"""
passage, inventory = self.__getText__(textId)
if subreference:
passage = passage.getTextualNode(subreference)
return passage.getReffs(level=level, subreference=subreference)