Spaces:
Runtime error
Runtime error
# !pip install googletrans==3.1.0a0 | |
# !pip install transformers sentencepiece | |
from googletrans import Translator | |
from transformers import MarianMTModel, MarianTokenizer # transformer based pre-trained language translation model | |
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
def translate_hi2en_gtrans(sentence): | |
""" | |
Function to translate from Hindi to English. | |
Args: | |
- sentence: string in Hindi | |
Returns: | |
- English translated text string | |
""" | |
translator = Translator() | |
output = translator.translate(sentence, dest='en', src='hi') | |
return output.text | |
def translate_en2hi_gtrans(sentence): | |
""" | |
Function to translate from English to Hindi. | |
Args: | |
- sentence: string in English | |
Returns: | |
- Hindi translated text string | |
""" | |
translator = Translator() | |
output = translator.translate(sentence, dest='hi', src='en') | |
return output.text | |
# Translates text from source_lang to target_lang using the pre-trained model | |
def translate_en_hi_transformer(text): | |
# Load the Pre-trained Model and Tokenizer for english to hindi | |
model_name_en_hi = "Helsinki-NLP/opus-mt-en-hi" # English to Hindi translation model | |
tokenizer = MarianTokenizer.from_pretrained(model_name_en_hi) | |
model_en_hi = MarianMTModel.from_pretrained(model_name_en_hi) | |
encoded = tokenizer(text, return_tensors="pt") | |
translated = model_en_hi.generate(**encoded) | |
return tokenizer.batch_decode(translated, skip_special_tokens=True)[0] | |
# Translates text from Hindi to english using the pre-trained model | |
def translate_hi_en_transformer(text): | |
# Load the Pre-trained Model and Tokenizer for hindi to english | |
model_name_hi_en = "Helsinki-NLP/opus-mt-hi-en" # Hindi to English translation model | |
tokenizer_hi = MarianTokenizer.from_pretrained(model_name_hi_en) | |
model_hi_en = MarianMTModel.from_pretrained(model_name_hi_en) | |
encoded = tokenizer_hi(text, return_tensors="pt") | |
translated = model_hi_en.generate(**encoded) | |
return tokenizer_hi.batch_decode(translated, skip_special_tokens=True)[0] | |
def translate_mbart(text, source_lang, target_lang): | |
# Load model and tokenizer outside the function | |
model_name = "facebook/mbart-large-50-many-to-many-mmt" | |
model = MBartForConditionalGeneration.from_pretrained(model_name) | |
tokenizer = MBart50TokenizerFast.from_pretrained(model_name) | |
# Set source language | |
tokenizer.src_lang = source_lang | |
# Encode the text | |
encoded_text = tokenizer(text, return_tensors="pt") | |
# Force target language token | |
forced_bos_token_id = tokenizer.lang_code_to_id[target_lang] | |
# Generate the translation | |
generated_tokens = model.generate(**encoded_text, forced_bos_token_id=forced_bos_token_id) | |
# Decode the translation | |
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
return translation | |
if __name__ == "__main__": | |
print(translate_hi2en_gtrans("मैं खुश हूँ!!!")) | |
print(translate_en2hi_gtrans("I am happy!!!")) | |
import pandas as pd | |
# Read CSV file into a Pandas DataFrame | |
df_en = pd.read_csv('Data_with_QnA.csv', usecols=['Question1', 'Answer1', 'Question2', 'Answer2', 'Question3', 'Answer3', 'Question4', 'Answer4']) | |
df_en.head() | |
# convert all the question answers from English to Hindi | |
df_hi = df_en.applymap(translate_en2hi_gtrans) | |
df_hi.head() | |
# Save the modified DataFrame to a CSV file | |
df_hi.to_csv('Hindi_QnA.csv', index=False) | |
# English to Hindi example | |
english_text = " What is the material used to create the chess set?" | |
hindi_translation = translate_en_hi_transformer(english_text) | |
print(f"English: {english_text}") | |
print(f"Hindi: {hindi_translation}") | |
# Hindi to English example | |
hindi_text = "आपका दिन कैसा चल रहा है?" # How is your day going? | |
english_translation = translate_hi_en_transformer(hindi_text) | |
print(f"Hindi: {hindi_text}") | |
print(f"English: {english_translation}") | |
# Example usage | |
hindi_text = "हिन्दी साहित्य पर अगर समुचित परिप्रेक्ष्य में विचार किया जाए तो स्पष्ट होता है कि हिन्दी साहित्य का इतिहास अत्यन्त विस्तृत व प्राचीन है। सुप्रसिद्ध भाषा वैज्ञानिक डॉ० हरदेव बाहरी के शब्दों में, हिन्दी साहित्य का इतिहास वस्तुतः वैदिक काल से आरम्भ होता है। यह कहना ही ठीक होगा कि वैदिक भाषा ही हिन्दी है। इस भाषा का दुर्भाग्य रहा है कि युग-युग में इसका नाम परिवर्तित होता रहा है। कभी 'वैदिक', कभी 'संस्कृत', कभी 'प्राकृत', कभी'अपभ्रंश' और अब - हिन्दी।[1] आलोचक कह सकते हैं कि 'वैदिक संस्कृत' और 'हिन्दी' में तो जमीन-आसमान का अन्तर है। पर ध्यान देने योग्य है कि हिब्रू, रूसी, चीनी, जर्मन और तमिल आदि जिन भाषाओं को 'बहुत पुरानी' बताया जाता है, उनके भी प्राचीन और वर्तमान रूपों में जमीन-आसमान का अन्तर है; पर लोगों ने उन भाषाओं के नाम नहीं बदले और उनके परिवर्तित स्वरूपों को 'प्राचीन', 'मध्यकालीन', 'आधुनिक' आदि कहा गया, जबकि 'हिन्दी' के सन्दर्भ में प्रत्येक युग की भाषा का नया नाम रखा जाता रहा।" | |
english_translation = translate_mbart(hindi_text, "hi_IN", "en_XX") | |
print(english_translation) | |
english_text = "English literature, the body of written works produced in the English language by inhabitants of the British Isles (including Ireland) from the 7th century to the present day. The major literatures written in English outside the British Isles are treated separately under American literature, Australian literature, Canadian literature, and New Zealand literature. English literature has sometimes been stigmatized as insular. It can be argued that no single English novel attains the universality of the Russian writer Leo Tolstoy’s War and Peace or the French writer Gustave Flaubert’s Madame Bovary. Yet in the Middle Ages the Old English literature of the subjugated Saxons was leavened by the Latin and Anglo-Norman writings, eminently foreign in origin, in which the churchmen and the Norman conquerors expressed themselves. From this combination emerged a flexible and subtle linguistic instrument exploited by Geoffrey Chaucer and brought to supreme application by William Shakespeare. During the Renaissance the renewed interest in Classical learning and values had an important effect on English literature, as on all the arts; and ideas of Augustan literary propriety in the 18th century and reverence in the 19th century for a less specific, though still selectively viewed, Classical antiquity continued to shape the literature. All three of these impulses derived from a foreign source, namely the Mediterranean basin. The Decadents of the late 19th century and the Modernists of the early 20th looked to continental European individuals and movements for inspiration. Nor was attraction toward European intellectualism dead in the late 20th century, for by the mid-1980s the approach known as structuralism, a phenomenon predominantly French and German in origin, infused the very study of English literature itself in a host of published critical studies and university departments. Additional influence was exercised by deconstructionist analysis, based largely on the work of French philosopher Jacques Derrida." | |
hindi_translation = translate_mbart(english_text, "en_XX", "hi_IN") | |
print(hindi_translation) | |