|
from src.domain.paragraph import Paragraph |
|
from src.domain.block import Block |
|
|
|
INFINITE = 10000 |
|
|
|
|
|
class Container: |
|
|
|
def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None, |
|
father=None, id_=0): |
|
if index is None: |
|
index = [] |
|
self.level = level |
|
if not self.level: |
|
pass |
|
self.title = title |
|
self.paragraphs = [] |
|
self.all_paragraphs = paragraphs |
|
self.children = [] |
|
self.index = index |
|
self.father = father |
|
self.id_ = int(str(1) + str(father.id_) + str(id_)) |
|
if paragraphs: |
|
self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index) |
|
self.containers = [self] |
|
for child in self.children: |
|
self.containers += child.containers |
|
self.blocks = self.get_blocks() |
|
self.normal, self.comment, self.task, _ = self.sort_paragraphs() |
|
|
|
self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment |
|
self.root_text = self.one_liner + ' ' + self.normal |
|
|
|
|
|
@property |
|
def text(self): |
|
text = "" |
|
if self.title: |
|
text = "Titre " + str(self.level) + " : " + self.title.text + '\n' |
|
for p in self.paragraphs: |
|
text += p.text + '\n' |
|
for child in self.children: |
|
text += child.text |
|
return text |
|
|
|
@property |
|
def table_of_contents(self): |
|
toc = [] |
|
if self.title: |
|
toc += [{str(self.level): self.title.text}] |
|
if self.children: |
|
for child in self.children: |
|
toc += child.table_of_contents |
|
return toc |
|
|
|
def move(self, position: int, new_father=None): |
|
current_father = self.father |
|
current_father.children.remove(self) |
|
|
|
self.rank = new_father.rank + 1 if new_father else 0 |
|
self.father = new_father |
|
if position < len(new_father.children): |
|
new_father.children.insert(position, self) |
|
else: |
|
new_father.children.append(self) |
|
|
|
def create_children(self, paragraphs, level, rank) -> ([], []): |
|
""" |
|
creates children containers or directly attached content |
|
and returns the list of containers and contents of level+1 |
|
:return: |
|
[Content or Container] |
|
""" |
|
attached_paragraphs = [] |
|
container_paragraphs = [] |
|
container_title = None |
|
children = [] |
|
in_children = False |
|
level = INFINITE |
|
child_id = 0 |
|
|
|
while paragraphs: |
|
p = paragraphs.pop(0) |
|
if not in_children and not p.is_structure: |
|
attached_paragraphs.append(p) |
|
else: |
|
in_children = True |
|
if p.is_structure and p.level <= level: |
|
if container_paragraphs or container_title: |
|
children.append(Container(container_paragraphs, container_title, level, rank, self, child_id)) |
|
child_id += 1 |
|
container_paragraphs = [] |
|
container_title = p |
|
level = p.level |
|
|
|
else: |
|
container_paragraphs.append(p) |
|
|
|
if container_paragraphs or container_title: |
|
children.append(Container(container_paragraphs, container_title, level, rank, self, child_id)) |
|
child_id += 1 |
|
|
|
return attached_paragraphs, children |
|
|
|
@property |
|
def structure(self): |
|
|
|
self_structure = {str(self.id_): { |
|
'index': str(self.id_), |
|
'canMove': True, |
|
'isFolder': True, |
|
'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children], |
|
'canRename': True, |
|
'data': {}, |
|
'level': self.level, |
|
'title': self.title.text if self.title else 'root' |
|
}} |
|
paragraphs_structure = [p.structure for p in self.paragraphs] |
|
structure = [self_structure] + paragraphs_structure |
|
for child in self.children: |
|
structure += child.structure |
|
return structure |
|
|
|
def get_lang(self): |
|
""" |
|
returns the main language of the document |
|
:return: |
|
""" |
|
|
|
def get_structure(self, level=2): |
|
""" |
|
returns the structure of the document |
|
:return: |
|
""" |
|
|
|
def create_embeddings(self): |
|
""" |
|
|
|
:return: |
|
""" |
|
|
|
def get_blocks(self): |
|
block = Block(level=self.level, index=self.index) |
|
if self.title: |
|
block.title = self.title.text |
|
for p in self.paragraphs: |
|
if not p.blank: |
|
if p.text.startswith('##### '): |
|
special_action = p.text.lstrip('##### ') |
|
block.specials.append(special_action) |
|
else: |
|
block.content += p.text |
|
blocks = [block] if block.content or block.specials else [] |
|
for child in self.children: |
|
blocks += child.blocks |
|
return blocks |
|
|
|
def get_fulltask(self, doc_one_liner): |
|
siblings_ = self.father.children.copy() |
|
index = siblings_.index(self) |
|
siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index] |
|
siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx] |
|
|
|
fulltask = {'description': self.task, |
|
'about': self.one_liner, |
|
'doc_description': doc_one_liner, |
|
'above': self.father.one_liner, |
|
'before': siblings_before_context, |
|
'after': siblings_after_context} |
|
return fulltask |
|
|
|
def sort_paragraphs(self) -> (str, str, str, str): |
|
mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''} |
|
for p in self.paragraphs: |
|
mapping[p.type] += ' ' + p.parsed_text |
|
return mapping['normal'], mapping['comment'], mapping['task'], mapping['title'] |
|
|
|
|
|
|