GenDoc / src /domain /container.py
YvesP's picture
initial commit
4cf88e8
from src.domain.paragraph import Paragraph
from src.domain.block import Block
INFINITE = 10000
class Container:
def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
father=None, id_=0):
if index is None:
index = []
self.level = level
if not self.level:
pass
self.title = title
self.paragraphs = []
self.all_paragraphs = paragraphs
self.children = []
self.index = index
self.father = father # if not father, then the container is at the top of the hierarchy
self.id_ = int(str(1) + str(father.id_) + str(id_))
if paragraphs:
self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
self.containers = [self]
for child in self.children:
self.containers += child.containers
self.blocks = self.get_blocks()
self.normal, self.comment, self.task, _ = self.sort_paragraphs()
self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment
self.root_text = self.one_liner + ' ' + self.normal
@property
def text(self):
text = ""
if self.title:
text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
for p in self.paragraphs:
text += p.text + '\n'
for child in self.children:
text += child.text
return text
@property
def table_of_contents(self):
toc = []
if self.title:
toc += [{str(self.level): self.title.text}]
if self.children:
for child in self.children:
toc += child.table_of_contents
return toc
def move(self, position: int, new_father=None):
current_father = self.father # should be added in the domain
current_father.children.remove(self)
self.rank = new_father.rank + 1 if new_father else 0
self.father = new_father
if position < len(new_father.children):
new_father.children.insert(position, self)
else:
new_father.children.append(self)
def create_children(self, paragraphs, level, rank) -> ([], []):
"""
creates children containers or directly attached content
and returns the list of containers and contents of level+1
:return:
[Content or Container]
"""
attached_paragraphs = []
container_paragraphs = []
container_title = None
children = []
in_children = False
level = INFINITE
child_id = 0
while paragraphs:
p = paragraphs.pop(0)
if not in_children and not p.is_structure:
attached_paragraphs.append(p)
else:
in_children = True
if p.is_structure and p.level <= level: # if p is higher or equal in hierarchy
if container_paragraphs or container_title:
children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
child_id += 1
container_paragraphs = []
container_title = p
level = p.level
else: # p is strictly lower in hierarchy
container_paragraphs.append(p)
if container_paragraphs or container_title:
children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
child_id += 1
return attached_paragraphs, children
@property
def structure(self):
self_structure = {str(self.id_): {
'index': str(self.id_),
'canMove': True,
'isFolder': True,
'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
'canRename': True,
'data': {},
'level': self.level,
'title': self.title.text if self.title else 'root'
}}
paragraphs_structure = [p.structure for p in self.paragraphs]
structure = [self_structure] + paragraphs_structure
for child in self.children:
structure += child.structure
return structure
def get_lang(self):
"""
returns the main language of the document
:return:
"""
def get_structure(self, level=2):
"""
returns the structure of the document
:return:
"""
def create_embeddings(self):
"""
:return:
"""
def get_blocks(self):
block = Block(level=self.level, index=self.index)
if self.title:
block.title = self.title.text
for p in self.paragraphs:
if not p.blank:
if p.text.startswith('##### '):
special_action = p.text.lstrip('##### ')
block.specials.append(special_action)
else:
block.content += p.text
blocks = [block] if block.content or block.specials else []
for child in self.children:
blocks += child.blocks
return blocks
def get_fulltask(self, doc_one_liner):
siblings_ = self.father.children.copy()
index = siblings_.index(self)
siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]
fulltask = {'description': self.task,
'about': self.one_liner,
'doc_description': doc_one_liner,
'above': self.father.one_liner,
'before': siblings_before_context,
'after': siblings_after_context}
return fulltask
def sort_paragraphs(self) -> (str, str, str, str):
mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''}
for p in self.paragraphs:
mapping[p.type] += ' ' + p.parsed_text
return mapping['normal'], mapping['comment'], mapping['task'], mapping['title']