Spaces:
Sleeping
Sleeping
import json | |
import os | |
import gradio as gr | |
def process_srt(file_path, podcast_name, podcast_episode): | |
# Get the directory of the current script and go one level up | |
script_dir = os.path.dirname(__file__) | |
parent_dir = os.path.dirname(script_dir) | |
# Prepare the output path in the 'downloads' directory one level above script_dir | |
output_dir = os.path.join(parent_dir, "downloads") | |
base_name = os.path.basename(file_path).rsplit('.', 1)[0] | |
output_file = os.path.join(output_dir, f"{base_name}.json") | |
# Create the downloads directory if it doesn't exist | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# Process the SRT file | |
with open(file_path, 'r', encoding='utf-8') as file: | |
srt_content = file.read() | |
entries = srt_content.strip().split('\n\n') | |
transcripts = [] | |
for entry in entries: | |
lines = entry.split('\n') | |
id = int(float(lines[0].replace('\ufeff', '').strip())) | |
timestamp = lines[1] | |
timestamp_start, timestamp_end = timestamp.split(" --> ") | |
transcript = ' '.join(lines[2:]) | |
transcripts.append({'podcast_name': podcast_name, 'podcast_episode': podcast_episode, 'line_id': id, 'timestamp_start': timestamp_start, 'timestamp_end': timestamp_end, 'content': transcript}) | |
json_data = transcripts | |
# Save the output to the specified JSON file | |
with open(output_file, 'w', encoding='utf-8') as outfile: | |
json.dump(json_data, outfile, indent=2) | |
return output_file | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=process_srt, | |
inputs=[ | |
gr.File(type='filepath', label='Upload Transcript (.srt)'), | |
gr.Textbox(label='Podcast Name'), | |
gr.Textbox(label='Podcast Episode') | |
], | |
outputs='file', | |
title='SRT to JSON Converter', | |
description='Upload an SRT file and enter the podcast name and episode to convert it to JSON format.' | |
) | |
interface.launch(share=True) |