Skip to content

Instantly share code, notes, and snippets.

@linuxscout
Last active March 25, 2023 12:59
Show Gist options
  • Save linuxscout/aa7fd8e691c700ab5ca00108fda9c911 to your computer and use it in GitHub Desktop.
Save linuxscout/aa7fd8e691c700ab5ca00108fda9c911 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# verbstamp.py
#
# Copyright 2023 zerrouki <zerrouki@majd4>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
import re
import pyarabic.araby as araby
VERB_STAMP_PAT = re.compile(u"[%s%s%s%s%s%s]"%(araby.ALEF, araby.YEH, araby.WAW,
araby.ALEF_MAKSURA, araby.HAMZA, araby.SHADDA), re.UNICODE)
def verb_stamp(word):
"""
generate a stamp for a verb,
the verb stamp is different of word stamp, by hamza normalization
remove all letters which can change form in the word :
- ALEF,
- YEH,
- WAW,
- ALEF_MAKSURA
- SHADDA
حذف الحروف الآتية: الألف، الياء، الواو، الألف المقصورة،
الشدّة
والحركات
توحيد الهمزات المختلفة إلى همزة على السطر
حذف الهمزة الابتدائية
حذف الحرف الأخير إن كان مضاعفا
@return: stamped word
"""
word = araby.strip_tashkeel(word)
#The vowels are striped in stamp function
word = araby.normalize_hamza(word)
if word.startswith(araby.HAMZA):
#strip The first hamza
word = word[1:]
# strip the last letter if is doubled
if word[-1:] == word[-2:-1]:
word = word[:-1]
return VERB_STAMP_PAT.sub('', word)
def main(args):
verb = "رمى"
print(verb_stamp(verb))
return 0
if __name__ == '__main__':
import sys
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment