Spaces:

Karthikeyen92
/

Stock-Sentiment-Analysis

Running

App Files Files Community

Stock-Sentiment-Analysis / py /handle_files.py

Karthikeyen92

Update py/handle_files.py

439d5b6 verified 26 days ago

raw

history blame

4.12 kB



	from datetime import datetime
	import json
	import os
	import pickle
	from typing import List
	from langchain.schema import Document
	import pandas as pd

	def create_files(social_media_data, hugg = False):
	folder_path = 'Stock Sentiment Analysis/files'
	if hugg:
	folder_path = 'files'

	if not os.path.exists(folder_path):
	os.makedirs(folder_path)

	# Save dictionary to a file
	with open(folder_path+'/social_media_data.json', 'w') as f:
	json.dump(social_media_data, f)

	# Convert the data to a pandas DataFrame
	df = pd.DataFrame(social_media_data)
	df.head()

	# Exporting the data to a CSV file
	file_path = folder_path+"/social_media_data.csv"
	df.to_csv(file_path, index=False)

	df.to_pickle(folder_path+"/social_media_data.pkl")

	def fetch_social_media_data(hugg = False):
	file_path = 'Stock Sentiment Analysis/files/social_media_data.json'
	if hugg:
	file_path = 'files/social_media_data.json'
	with open(file_path, 'r') as file:
	data = json.load(file)
	social_media_document = []
	for item in data:
	social_media_document.append(Document(
	page_content=str(item["page_content"]),
	metadata={"platform":item["platform"],
	"company":item["company"],
	"ingestion_timestamp":datetime.now().isoformat(),
	"word_count":len(item["page_content"]["content"]),
	"link":item["link"] if "link" in item else ""
	}))
	return social_media_document

	def save_ingested_data(ingested_data):
	# Save the list to a file
	with open('Stock Sentiment Analysis/files/ingested_data.pkl', 'wb') as file:
	pickle.dump(ingested_data, file)

	def save_analysed_data(analysed_data):
	# Save the list to a file
	with open('Stock Sentiment Analysis/files/analysed_data.pkl', 'wb') as file:
	pickle.dump(analysed_data, file)

	def get_ingested_data():
	# Load the list from the file
	with open('Stock Sentiment Analysis/files/ingested_data.pkl', 'rb') as file:
	loaded_documents = pickle.load(file)
	return loaded_documents

	def get_analysed_data():
	# Load the list from the file
	with open('Stock Sentiment Analysis/files/analysed_data.pkl', 'rb') as file:
	loaded_documents = pickle.load(file)
	return loaded_documents

	def sample_documents(documents: List[Document], n: int) -> List[Document]:
	"""
	Samples `n` entries for each unique `"platform"` and `"company"` metadata combination from the input `Document[]`.

	Args:
	documents (List[Document]): The input list of `Document` objects.
	n (int): The number of entries to sample for each unique metadata combination.

	Returns:
	List[Document]: A new list of `Document` objects, with `n` entries per unique metadata combination.
	"""
	# Create a dictionary to store the sampled documents per metadata combination
	sampled_docs = {}

	for doc in documents:
	combo = (doc.metadata["platform"], doc.metadata["company"])
	if combo not in sampled_docs:
	sampled_docs[combo] = []

	# Add the document to the list for its metadata combination, up to n entries
	if len(sampled_docs[combo]) < n:
	sampled_docs[combo].append(doc)

	# Flatten the dictionary into a single list
	return [doc for docs in sampled_docs.values() for doc in docs]

	def to_documents(data) -> List[Document]:
	social_media_document = []
	for item in data:
	social_media_document.append(Document(
	page_content=str(item["page_content"]),
	metadata={"platform":item["platform"],
	"company":item["company"],
	"ingestion_timestamp":datetime.now().isoformat(),
	"word_count":len(item["page_content"]["content"]),
	"link": item["link"] if "link" in item else ""
	}))
	return social_media_document