linuxscout/verbstamp.py

## verbstamp.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#  verbstamp.py
#
#  Copyright 2023 zerrouki <zerrouki@majd4>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#

import re
import pyarabic.araby as araby

VERB_STAMP_PAT = re.compile(u"[%s%s%s%s%s%s]"%(araby.ALEF, araby.YEH, araby.WAW,
        araby.ALEF_MAKSURA, araby.HAMZA, araby.SHADDA), re.UNICODE)
def verb_stamp(word):
    """
    generate a stamp for a verb,
    the verb stamp is different of word stamp, by hamza normalization
    remove all letters which can change form in the word :
        - ALEF,
        - YEH,
        - WAW,
        - ALEF_MAKSURA
        - SHADDA
        حذف الحروف الآتية: الألف، الياء، الواو، الألف المقصورة،
        الشدّة
        والحركات
        توحيد الهمزات المختلفة إلى همزة على السطر
        حذف الهمزة الابتدائية
        حذف الحرف الأخير إن كان مضاعفا
    @return: stamped word
    """
    word = araby.strip_tashkeel(word)
    #The vowels are striped in stamp function
    word = araby.normalize_hamza(word)
    if word.startswith(araby.HAMZA):
        #strip The first hamza
        word = word[1:]
    # strip the last letter if is doubled
    if word[-1:] == word[-2:-1]:
        word = word[:-1]
    return VERB_STAMP_PAT.sub('', word)
def main(args):
    verb = "رمى"
    print(verb_stamp(verb))
    return 0

if __name__ == '__main__':
    import sys
    sys.exit(main(sys.argv))
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	# verbstamp.py
	#
	# Copyright 2023 zerrouki <zerrouki@majd4>
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	# MA 02110-1301, USA.
	#
	#

	import re
	import pyarabic.araby as araby

	VERB_STAMP_PAT = re.compile(u"[%s%s%s%s%s%s]"%(araby.ALEF, araby.YEH, araby.WAW,
	araby.ALEF_MAKSURA, araby.HAMZA, araby.SHADDA), re.UNICODE)
	def verb_stamp(word):
	"""
	generate a stamp for a verb,
	the verb stamp is different of word stamp, by hamza normalization
	remove all letters which can change form in the word :
	- ALEF,
	- YEH,
	- WAW,
	- ALEF_MAKSURA
	- SHADDA
	حذف الحروف الآتية: الألف، الياء، الواو، الألف المقصورة،
	الشدّة
	والحركات
	توحيد الهمزات المختلفة إلى همزة على السطر
	حذف الهمزة الابتدائية
	حذف الحرف الأخير إن كان مضاعفا
	@return: stamped word
	"""
	word = araby.strip_tashkeel(word)
	#The vowels are striped in stamp function
	word = araby.normalize_hamza(word)
	if word.startswith(araby.HAMZA):
	#strip The first hamza
	word = word[1:]
	# strip the last letter if is doubled
	if word[-1:] == word[-2:-1]:
	word = word[:-1]
	return VERB_STAMP_PAT.sub('', word)
	def main(args):
	verb = "رمى"
	print(verb_stamp(verb))
	return 0

	if __name__ == '__main__':
	import sys
	sys.exit(main(sys.argv))