Spaces:

santanus24
/

DL4NLP

Runtime error

App Files Files Community

DL4NLP / language_translation.py

santanus24

uploading all .py files

9b5fe77 verified 10 months ago

raw

history blame

8.63 kB


	# !pip install googletrans==3.1.0a0
	# !pip install transformers sentencepiece

	from googletrans import Translator
	from transformers import MarianMTModel, MarianTokenizer # transformer based pre-trained language translation model
	from transformers import MBartForConditionalGeneration, MBart50TokenizerFast



	def translate_hi2en_gtrans(sentence):
	"""
	Function to translate from Hindi to English.

	Args:
	- sentence: string in Hindi

	Returns:
	- English translated text string

	"""
	translator = Translator()
	output = translator.translate(sentence, dest='en', src='hi')
	return output.text

	def translate_en2hi_gtrans(sentence):
	"""
	Function to translate from English to Hindi.

	Args:
	- sentence: string in English

	Returns:
	- Hindi translated text string

	"""
	translator = Translator()
	output = translator.translate(sentence, dest='hi', src='en')
	return output.text

	# Translates text from source_lang to target_lang using the pre-trained model
	def translate_en_hi_transformer(text):
	# Load the Pre-trained Model and Tokenizer for english to hindi
	model_name_en_hi = "Helsinki-NLP/opus-mt-en-hi" # English to Hindi translation model
	tokenizer = MarianTokenizer.from_pretrained(model_name_en_hi)
	model_en_hi = MarianMTModel.from_pretrained(model_name_en_hi)
	encoded = tokenizer(text, return_tensors="pt")
	translated = model_en_hi.generate(**encoded)
	return tokenizer.batch_decode(translated, skip_special_tokens=True)[0]

	# Translates text from Hindi to english using the pre-trained model
	def translate_hi_en_transformer(text):
	# Load the Pre-trained Model and Tokenizer for hindi to english
	model_name_hi_en = "Helsinki-NLP/opus-mt-hi-en" # Hindi to English translation model
	tokenizer_hi = MarianTokenizer.from_pretrained(model_name_hi_en)
	model_hi_en = MarianMTModel.from_pretrained(model_name_hi_en)
	encoded = tokenizer_hi(text, return_tensors="pt")
	translated = model_hi_en.generate(**encoded)
	return tokenizer_hi.batch_decode(translated, skip_special_tokens=True)[0]

	def translate_mbart(text, source_lang, target_lang):
	# Load model and tokenizer outside the function
	model_name = "facebook/mbart-large-50-many-to-many-mmt"
	model = MBartForConditionalGeneration.from_pretrained(model_name)
	tokenizer = MBart50TokenizerFast.from_pretrained(model_name)

	# Set source language
	tokenizer.src_lang = source_lang
	# Encode the text
	encoded_text = tokenizer(text, return_tensors="pt")
	# Force target language token
	forced_bos_token_id = tokenizer.lang_code_to_id[target_lang]
	# Generate the translation
	generated_tokens = model.generate(**encoded_text, forced_bos_token_id=forced_bos_token_id)
	# Decode the translation
	translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
	return translation


	if __name__ == "__main__":
	print(translate_hi2en_gtrans("मैं खुश हूँ!!!"))
	print(translate_en2hi_gtrans("I am happy!!!"))
	import pandas as pd

	# Read CSV file into a Pandas DataFrame
	df_en = pd.read_csv('Data_with_QnA.csv', usecols=['Question1', 'Answer1', 'Question2', 'Answer2', 'Question3', 'Answer3', 'Question4', 'Answer4'])
	df_en.head()

	# convert all the question answers from English to Hindi
	df_hi = df_en.applymap(translate_en2hi_gtrans)
	df_hi.head()

	# Save the modified DataFrame to a CSV file
	df_hi.to_csv('Hindi_QnA.csv', index=False)

	# English to Hindi example
	english_text = " What is the material used to create the chess set?"
	hindi_translation = translate_en_hi_transformer(english_text)
	print(f"English: {english_text}")
	print(f"Hindi: {hindi_translation}")

	# Hindi to English example
	hindi_text = "आपका दिन कैसा चल रहा है?" # How is your day going?
	english_translation = translate_hi_en_transformer(hindi_text)
	print(f"Hindi: {hindi_text}")
	print(f"English: {english_translation}")

	# Example usage
	hindi_text = "हिन्दी साहित्य पर अगर समुचित परिप्रेक्ष्य में विचार किया जाए तो स्पष्ट होता है कि हिन्दी साहित्य का इतिहास अत्यन्त विस्तृत व प्राचीन है। सुप्रसिद्ध भाषा वैज्ञानिक डॉ० हरदेव बाहरी के शब्दों में, हिन्दी साहित्य का इतिहास वस्तुतः वैदिक काल से आरम्भ होता है। यह कहना ही ठीक होगा कि वैदिक भाषा ही हिन्दी है। इस भाषा का दुर्भाग्य रहा है कि युग-युग में इसका नाम परिवर्तित होता रहा है। कभी 'वैदिक', कभी 'संस्कृत', कभी 'प्राकृत', कभी'अपभ्रंश' और अब - हिन्दी।[1] आलोचक कह सकते हैं कि 'वैदिक संस्कृत' और 'हिन्दी' में तो जमीन-आसमान का अन्तर है। पर ध्यान देने योग्य है कि हिब्रू, रूसी, चीनी, जर्मन और तमिल आदि जिन भाषाओं को 'बहुत पुरानी' बताया जाता है, उनके भी प्राचीन और वर्तमान रूपों में जमीन-आसमान का अन्तर है; पर लोगों ने उन भाषाओं के नाम नहीं बदले और उनके परिवर्तित स्वरूपों को 'प्राचीन', 'मध्यकालीन', 'आधुनिक' आदि कहा गया, जबकि 'हिन्दी' के सन्दर्भ में प्रत्येक युग की भाषा का नया नाम रखा जाता रहा।"
	english_translation = translate_mbart(hindi_text, "hi_IN", "en_XX")
	print(english_translation)

	english_text = "English literature, the body of written works produced in the English language by inhabitants of the British Isles (including Ireland) from the 7th century to the present day. The major literatures written in English outside the British Isles are treated separately under American literature, Australian literature, Canadian literature, and New Zealand literature. English literature has sometimes been stigmatized as insular. It can be argued that no single English novel attains the universality of the Russian writer Leo Tolstoy’s War and Peace or the French writer Gustave Flaubert’s Madame Bovary. Yet in the Middle Ages the Old English literature of the subjugated Saxons was leavened by the Latin and Anglo-Norman writings, eminently foreign in origin, in which the churchmen and the Norman conquerors expressed themselves. From this combination emerged a flexible and subtle linguistic instrument exploited by Geoffrey Chaucer and brought to supreme application by William Shakespeare. During the Renaissance the renewed interest in Classical learning and values had an important effect on English literature, as on all the arts; and ideas of Augustan literary propriety in the 18th century and reverence in the 19th century for a less specific, though still selectively viewed, Classical antiquity continued to shape the literature. All three of these impulses derived from a foreign source, namely the Mediterranean basin. The Decadents of the late 19th century and the Modernists of the early 20th looked to continental European individuals and movements for inspiration. Nor was attraction toward European intellectualism dead in the late 20th century, for by the mid-1980s the approach known as structuralism, a phenomenon predominantly French and German in origin, infused the very study of English literature itself in a host of published critical studies and university departments. Additional influence was exercised by deconstructionist analysis, based largely on the work of French philosopher Jacques Derrida."
	hindi_translation = translate_mbart(english_text, "en_XX", "hi_IN")
	print(hindi_translation)