from src.domain.paragraph import Paragraph from src.domain.block import Block INFINITE = 10000 class Container: def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None, father=None, id_=0): if index is None: index = [] self.level = level if not self.level: pass self.title = title self.paragraphs = [] self.all_paragraphs = paragraphs self.children = [] self.index = index self.father = father # if not father, then the container is at the top of the hierarchy self.id_ = int(str(1) + str(father.id_) + str(id_)) if paragraphs: self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index) self.containers = [self] for child in self.children: self.containers += child.containers self.blocks = self.get_blocks() self.normal, self.comment, self.task, _ = self.sort_paragraphs() self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment self.root_text = self.one_liner + ' ' + self.normal @property def text(self): text = "" if self.title: text = "Titre " + str(self.level) + " : " + self.title.text + '\n' for p in self.paragraphs: text += p.text + '\n' for child in self.children: text += child.text return text @property def table_of_contents(self): toc = [] if self.title: toc += [{str(self.level): self.title.text}] if self.children: for child in self.children: toc += child.table_of_contents return toc def move(self, position: int, new_father=None): current_father = self.father # should be added in the domain current_father.children.remove(self) self.rank = new_father.rank + 1 if new_father else 0 self.father = new_father if position < len(new_father.children): new_father.children.insert(position, self) else: new_father.children.append(self) def create_children(self, paragraphs, level, rank) -> ([], []): """ creates children containers or directly attached content and returns the list of containers and contents of level+1 :return: [Content or Container] """ attached_paragraphs = [] container_paragraphs = [] container_title = None children = [] in_children = False level = INFINITE child_id = 0 while paragraphs: p = paragraphs.pop(0) if not in_children and not p.is_structure: attached_paragraphs.append(p) else: in_children = True if p.is_structure and p.level <= level: # if p is higher or equal in hierarchy if container_paragraphs or container_title: children.append(Container(container_paragraphs, container_title, level, rank, self, child_id)) child_id += 1 container_paragraphs = [] container_title = p level = p.level else: # p is strictly lower in hierarchy container_paragraphs.append(p) if container_paragraphs or container_title: children.append(Container(container_paragraphs, container_title, level, rank, self, child_id)) child_id += 1 return attached_paragraphs, children @property def structure(self): self_structure = {str(self.id_): { 'index': str(self.id_), 'canMove': True, 'isFolder': True, 'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children], 'canRename': True, 'data': {}, 'level': self.level, 'title': self.title.text if self.title else 'root' }} paragraphs_structure = [p.structure for p in self.paragraphs] structure = [self_structure] + paragraphs_structure for child in self.children: structure += child.structure return structure def get_lang(self): """ returns the main language of the document :return: """ def get_structure(self, level=2): """ returns the structure of the document :return: """ def create_embeddings(self): """ :return: """ def get_blocks(self): block = Block(level=self.level, index=self.index) if self.title: block.title = self.title.text for p in self.paragraphs: if not p.blank: if p.text.startswith('##### '): special_action = p.text.lstrip('##### ') block.specials.append(special_action) else: block.content += p.text blocks = [block] if block.content or block.specials else [] for child in self.children: blocks += child.blocks return blocks def get_fulltask(self, doc_one_liner): siblings_ = self.father.children.copy() index = siblings_.index(self) siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index] siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx] fulltask = {'description': self.task, 'about': self.one_liner, 'doc_description': doc_one_liner, 'above': self.father.one_liner, 'before': siblings_before_context, 'after': siblings_after_context} return fulltask def sort_paragraphs(self) -> (str, str, str, str): mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''} for p in self.paragraphs: mapping[p.type] += ' ' + p.parsed_text return mapping['normal'], mapping['comment'], mapping['task'], mapping['title']