Gosse Minnema
Add sociofillmore code, load dataset via private dataset repo
b11ac48
raw
history blame contribute delete
454 Bytes
import pandas as pd
def extract_texts():
df = pd.read_excel("data/femicides/rai/EventiFemminicidio_from2015to2017_fonti.xlsx")
print(df)
for _, row in df.iterrows():
source_id = row["ID"]
text = f"{row['title']}\n\n{row['text']}"
with open(f"output/femicides/extract_text/source_{source_id}.txt", "w", encoding="utf-8") as f:
f.write(text)
if __name__ == '__main__':
extract_texts()