##  This is the master file to generate the TECkit ArabTeX font mappings 
##     for use with XeTeX
##  The mappings are generated with a simple perl script (makemaps.pl)
##
##  Conventions used:
##  ---------------------------------------------------------------------------------
##  Lines starting with ## are comments that don't appear in the map files
##  
##  Lines that don't begin with # are common to all mappings
##  Otherwise the letters following '#' indicate specific mappings 
##  according to vocalization modes and languages
##  #...n -> non-vocalized 
##  #...v -> vocalized and fully voc.   
##  #...f -> fully voc.
##  @v -> voc only
##  The language codes are:
##  A ->  Arabic
#   B ->  Maghribi Arabic
##  P ->  Persian 
##  U ->  Urdu  
##  T ->  Pashto
##  S ->  Sindhi 
##  K ->  Kashmiri
##  O ->  Ottoman Turkish
##  M ->  Malay
##  KURDISH AND UIGHUR ARE CODED SEPARATELY <<<
##  The above can be negated with #-[APUTSKOM]
##  Lines starting with '@[AP...][nvf]' are specific to the mapping 
##                 indicated by the two-letter key, following the conventions above
## ----------------------------------------------------------------------------------
@An LHSName  "ArabTeX-novowels"
@Av LHSName  "ArabTeX-vocalized"
@Af LHSName  "ArabTeX-fullvocalized"
@Bn LHSName  "ArabTeX-maghribi-novowels"
@Bv LHSName  "ArabTeX-maghribi-vocalized"
@Bf LHSName  "ArabTeX-maghribi-fullvocalized"
@Pn LHSName  "ArabTeX-farsi-novowels"
@Pv LHSName  "ArabTeX-farsi-vocalized"
@Pf LHSName  "ArabTeX-farsi-fullvocalized"
@Un LHSName  "ArabTeX-urdu-novowels"
@Uv LHSName  "ArabTeX-urdu-vocalized"
@Uf LHSName  "ArabTeX-urdu-fullvocalized"
@Sn LHSName  "ArabTeX-sindhi-novowels"
@Sv LHSName  "ArabTeX-sindhi-vocalized"
@Sf LHSName  "ArabTeX-sindhi-fullvocalized"
@Kn LHSName  "ArabTeX-kashmiri-novowels"
@Kv LHSName  "ArabTeX-kashmiri-vocalized"
@Kf LHSName  "ArabTeX-kashmiri-fullvocalized"
@Tn LHSName  "ArabTeX-pashto-novowels"
@Tv LHSName  "ArabTeX-pashto-vocalized"
@Tf LHSName  "ArabTeX-pashto-fullvocalized"
## @Kn LHSName  "ArabTeX-kurdish-novowels"
## @Kv LHSName  "ArabTeX-kurdish-vocalized"
## @Kf LHSName  "ArabTeX-kurdish-fullvocalized"
@On LHSName  "ArabTeX-turk-novowels"
@Ov LHSName  "ArabTeX-turk-vocalized"
@Of LHSName  "ArabTeX-turk-fullvocalized"
@Mn LHSName  "ArabTeX-malay-novowels"
@Mv LHSName  "ArabTeX-malay-vocalized"
@Mf LHSName  "ArabTeX-malay-fullvocalized"
## @Rn LHSName  "ArabTeX-uighur-novowels"
## @Rv LHSName  "ArabTeX-uighur-vocalized"
## @Rf LHSName  "ArabTeX-uighur-fullvocalized"
## 
##
RHSName  "UNICODE"

Define a U+0061  ;a
Define b U+0062  ;b
Define c U+0063  ;c
Define d U+0064  ;d
Define e U+0065  ;e
Define f U+0066  ;f
Define g U+0067  ;g
Define h U+0068  ;h
Define i U+0069  ;i
Define j U+006A  ;j
Define k U+006B  ;k
Define l U+006C  ;l
Define m U+006D  ;m
Define n U+006E  ;n
Define o U+006F  ;o
Define p U+0070  ;p
Define q U+0071  ;q
Define r U+0072  ;r
Define s U+0073  ;s
Define t U+0074  ;t
Define u U+0075  ;u
Define v U+0076  ;v
Define w U+0077  ;w
Define x U+0078  ;x
Define y U+0079  ;y
Define z U+007A  ;z
Define A U+0041  ;A
Define B U+0042  ;B
Define E U+0045  ;E
Define G U+0047  ;G
Define I U+0049  ;I
Define N U+004E  ;N
Define O U+004F  ;O
Define T U+0054  ;T
Define U U+0055  ;U
Define W U+0057  ;W
Define Y U+0059  ;Y
Define C U+005E  ;^  << CARET
Define UL U+005F ;_  << UNDERLINE 
Define P U+002E  ;.  << DOT
Define V U+002C  ;,  << COMMA (_V_irgule)
Define CO U+003A ;:  << COLON
Define SC U+003B ;; << SEMICOLON
Define Ain U+0060 ;` 
Define H U+0027  ;'  << HAMZA 
Define Q U+0022  ;"  << QUOTE
Define Z U+002D  ;-  << HYPHEN
Define BAR U+007C ;|
Define AN  U+0061 U+004E ; aN
Define LLAH U+006C U+006C U+005F U+0061 U+0068 ; ll_ah
;Define NUL U+00A6;
Define Ha  U+0623
Define Hi  U+0625
Define Hy  U+0626
Define Hw  U+0624
Define Hh  U+06C0 ;; = U+0647 U+0654  
Define HA  U+0622
Define HB  U+0640 U+0654
Define HBAR U+0621 ; '| 

pass(Unicode)

U+E000 > ; eliminate special code for capitalization in transliterations

pass(Unicode)

Class [VWL]         = (U+0061  U+0065  U+0069  U+006F  U+0075 ) ;(a e i o u) 
Class [AVWL]    = (U+064B..U+0650);
Class [LTR]	    = (U+0061..U+007A U+0041 U+0045 U+0047 U+0049 U+004F U+0055 U+0042 U+004E U+0054 U+0059 U+005E U+002E U+005F U+0022 U+0027 U+0060) ;a..z A E I O U B N T Y ^ . _ " "" `
Class [BRACKETS]    = (U+003C U+003E U+00AB U+00BB U+0028 U+0029 U+005B U+005D U+2018 U+2019 U+201C U+201D); < > () « » [ ] ‘ ’ “ ”
Class [PUNCT] = (V SC P CO)
Class [Iy]	    = (U+0049 U+0079) ;(I y)
Class [Uw]          = (U+0055 U+0077) ;(U w)
Class [Uwu]          = (U+0055 U+0077 U+0075) ;(U w u)
;Class [ae]	    = (U+0061 U+0065) ;(a e)
;Class [ou]          = (U+0075 U+006F) ;(u o)
Class [Iyi]	    = (U+0069 U+0079 U+0049) 
Class [UI]          = (U+0049 U+0055) ; U I
Class [Digits] = (U+0030..U+0039)

Define BEG  (#|Z|[BRACKETS])
Define END  (#|[BRACKETS]|[PUNCT] U+0020|[PUNCT] #)
Define ENDZ (#|[BRACKETS]|[PUNCT] U+0020|[PUNCT] #|Z)

; special ligatures
;; 1. Standard ArabTeX input
;; FDF2 is just used as temporary holder, and is converted back to ligature lam lam hah later (to be taken care of by the font)
#Af Z (a|A) l Z LLAH > U+0671 U+FDF2 ;; '-al-ll_ah' or '-Al-ll_ah'
;; The BAR is used in the DMG transliteration to suppress the initial A
(a|A) BAR? l Z LLAH > U+0627 U+FDF2 ;; 'al-ll_ah' or 'Al-ll_ah'
LLAH > U+FDF2  ;; 'll_ah' -> 'l l h'
;; UPPERCASE mnemonics for various ligatures or special glyphs
U+004C U+004C U+0048 > U+FDF2  ;; 'LLH' -> 'l l h'
U+0046 U+0041 U+004C U+0049 U+004C U+004C U+0041 U+0048 > U+0641 U+064E U+0644 U+0650 U+0644 U+0647 ;; 'FALILLAH'
U+0041 U+004C U+004C U+0041 U+0048 > U+0627 U+0644 U+0644 U+0647 ; 'ALLAH'
U+004C U+004C U+0041 U+0048 > U+0644 U+0644 U+0647 ; 'LLAH'

U+0053 U+004C U+004D > U+FDFA ; 'SLM' : .sallY al-ll_ah `alayhi wa-sallam
U+0028 U+0028 > U+FD3F ;; '((' : ornate right par
U+0029 U+0029 > U+FD3E ;; '))' : ornate left par
U+0052 U+0049 U+0059 U+0041 U+004C > U+FDFC ;; 'RIYAL' : Saudi currency sign
;; most fonts won't have the following ones:
U+0053 U+0041 U+004C U+004C U+0041 U+0053 U+0054 U+004F U+0050 > U+FDF0 ;; 'SALLASTOP' (.sallY with "ye-barree", Quranic stop sign)
U+0051 U+0041 U+004C U+0041 > U+FDF1 ;; 'QALA' : qalY with ye-barree, Quranic stop sign
U+0041 U+004B U+0042 U+0041 U+0052 > U+FDF3 ;; 'AKBAR'
U+004D U+0055 U+0048 U+0041 U+004D U+004D U+0041 U+0044 > U+FDF4 ;; 'MUHAMMAD'
U+0053 U+0041 U+004C U+0041 U+004D > U+FDF5 ;; 'SALAM'
U+0052 U+0041 U+0053 U+004F ? U+0055 U+004C > U+FDF6 ;; 'RASUL' (also unicode 'RASOUL')
U+0041 U+004C U+0041 U+0059 U+0048 (U+0049|U+0045) > U+FDF7 ;; 'ALAYHI' (also unicode 'ALAYHE')
U+0057 U+0041 U+0053 U+0041 U+004C U+004C U+0041 U+004D > U+FDF8 ;; 'WASALLAM' 
U+0053 U+0041 U+004C U+004C U+0041 > U+FDF9 ;; 'SALLA'
U+004A U+0041 U+004C U+004C U+0041 > U+FDFB ;; 'JALLA' : "jalla jalAluhu"
U+0042 U+0041 U+0053 U+004D U+0041 U+004C U+0041 > U+FDFD ;; 'BASMALA' : very few fonts have this ligature afaik

U+005E U+0053 U+004C U+0059 > U+06D6 ;; ^SLY
U+005E U+0051 U+004C U+0059 > U+06D7 ;; ^QLY
U+005E U+004D U+0049 U+004D > U+06D8 ;; ^MIM-
U+005E U+004C U+0041 > U+06D9 ;; ^LA
U+005E U+004A U+0049 U+004D > U+06DA ;; ^JIM
U+005E U+0044 U+004F U+0054 U+0053 > U+06DB ;; ^DOTS
U+005E U+0053 U+0049 U+004E > U+06DC ;; ^SIN
U+0048 U+0049 U+005A U+0042 > U+06DE ;; HIZB
U+0043 U+0049 U+0052 U+0043 U+005A U+0045 U+0052 U+004F > U+06DF ;; CIRCZERO
U+0052 U+0045 U+0043 U+0054 U+005A U+0045 U+0052 U+004F > U+06E0 ;; RECTZERO
U+005E U+004A U+0041 U+005A U+004D > U+06E1 ;; ^JAZM
U+005E U+004D U+0049 U+004D > U+06E2 ;; ^MIM
U+005F U+0053 U+0049 U+004E > U+06E3 ;; _SIN
U+005E U+004D U+0041 U+0044 U+0044 U+0041 > U+06E4 ;; ^MADDA
U+0057 U+0041 U+0057 > U+06E5 ;; WAW
U+0059 U+0045 U+0048 > U+06E6 ;; YEH
U+005E U+0059 U+0045 U+0048 > U+06E7 ;; ^YEH
U+005E U+004E U+0055 U+004E > U+06E8 ;; ^NUN
U+0053 U+0041 U+004A U+0044 U+0041 > U+06E9 ;; SAJDA
U+005F U+0053 U+0054 U+004F U+0050 > U+06EA ;; _STOP
U+005E U+0053 U+0054 U+004F U+0050 > U+06EB ;; ^STOP
U+005E U+0052 U+0053 U+0054 U+004F U+0050 > U+06EC ;; ^RSTOP
U+005F U+004D U+0049 U+004D > U+06ED  ;; _MIM

U+0044 U+004F U+0054 U+0053 > U+061E ;; DOTS  (also possible by typing "::")

;; This is to enclose digits within glyph U+06DD
U+005B U+005B ([Digits]+)=dig U+005D U+005D > U+06DD @dig ; [[digits]]


; vowels
u u > U 
i i > I 
a a > A
#n [AVWL] > ; strip off vowels when writing UTF-8 Arabic with novoc mode
#T a e > a U+0626
#T E e > U+0659 U+06CD
#T e e > U+0659 U+0626
#T E  > U+0659 U+06D0
CO O > U+06FC ;
#T O  > U+0657 w
#T o  > U+0657 
#U H E > U+06D3 ;; hamza + yeh barree
#U E / _ #  > U+06D2
#U E > y
#U a e / _ # > a U+06D2
#-T a e > a y
a o > a w
#PUKS (o|u)? O / # _ > U+0627 w  
#PUKS (o|u)? O > w  
CO U > U+06C7 ;
#-TK e   > i ;; but not pashto > U+0659 and kashmiri > y + U+0658     (U+06CE is kurdish)
#T e > U+0659
#K e / # _ # > U+0627 U+06D2 U+0658 ; Alif + yeh barree + bow accent
#K e / _ # > U+06D2 U+0658 ; yeh barree + bow accent
#K e / # _ > U+0627 y U+0658  ; alif + ya + bow accent
#K e > y U+0658 
#K E / # _ # > U+0627 U+06D2 ; Alif + yeh barree 
#K E / _ # > U+06D2 ; yeh barree 
#K E / # _ > U+0627 y ; alif + ya 
#K E > y 
#K o / # _  > U+0627 U+06C6 
#K o   > U+06C6 
#-TK o   > u 
#AP E > I 
#A O  > U 
#K P A > U+0672
#K P U > U+0673  ; > this is the initial or lone form, but medial and final require wavy hamza below previous letter (not in Unicode)! Perhaps an OT font could solve this :: in Scheherazade the glyph needed is named _wavyHamza <<<
#K P a / # _ > U+0623  
#K P a > U+0654
#K P u > U+0655
#K P o / # _ > U+0627 U+06C4
#K P o > U+06C4
#K P O / # _ > U+0627 U+06C4 U+0627 U+064E
#K P O > U+06C4 U+0627 U+064E
#K I > y U+0656
#K I / _ # > y
#K U / _ # > U+0627 w U+0657
#K U > w U+0657
#-ASMK I / _ # > i y


#-A ;; Persian ezafe  (ALL BUT ARABIC AND UIGHUR)
#-A U+0048 Z UL? (Q? (i|e))=vv / _ END > U+06C0 @vv U+200C ;H-i -> heh+hamza final; what about 06C2 in Urdu ?<<<
#-A U+0048 Z Q?=qq (I|E)=vv / _ END > h U+0020 U+0627 @qq i @vv ; H-I
#-A Q?=q1 (I|E) Z UL? Q?=q2 (i|e) / _ END > @q1 i U+0626 @q2 i U+200C ; ...I-e
#-A Z Q?=qq I  / (A|U) _ ENDZ > U+0626 @qq i y ; ...A-I or ...U-I
#-A Z UL? (Q? (i|e))=vv / (A|U) _ END >  U+0649 @vv ; ...A-i or ...U-i
#-A Z Q?=q1 (e|i) / _ ENDZ > @q1 i ; ...-i
#-A Z / U+0048 _ > U+0020 ;
#-A b Q?=q1 (e|i) Q?=q2 U / # _ > b @q1 i U+0627 @q2 u w ;; special case of prep be without hyphen

;; initial characters ...
a l Z / BEG _ > U+0627 l Z
#An H l Z / BEG _ > U+0627 l Z
@Av H l Z / BEG _ > U+0627 l Z
@Bv H l Z / BEG _ > U+0627 l Z
#Af H l Z / BEG _ > U+0671 l Z ; THIS IS NOT AN ARABTEX CONVENTION! it's a work-around coz pattern-matching across word boundaries does not work in teckit , so min al- must be written mina 'l- in fullvocalize mode
;; wasla on initial alif : NEW! NOT TESTED!
#Af [VWL]=v1 Z / BEG _ > @v1 U+0671 ; THIS IS NOT AN ARABTEX CONVENTION! it's a work-around coz pattern-matching across word boundaries does not work in teckit , so huwa irtifA` must be written huwa i-rtifA` in fullvocalize mode
#n H / BEG _ l l > U+0627 ; 
#v H / BEG _ l l > U+0671 ; e.g. alla_dI -> 'lla_dI 
#n [VWL]=v1 / BEG _ > A @v1
#v [VWL]=v1 / BEG _ > U+0627 @v1
#An A / BEG _ > A a
#Av A / BEG _ > U+0627 a
#PU A / BEG _ > HA ;; ADD MORE ? <<<
#n Q?=q1 U / BEG _ > A @q1 u w ; -U
#v Q?=q1 U / BEG _ > U+0627 @q1 u w ; -U
#n ; Q?=q1 I / BEG _ > A @q1 i y  ; -I ; but not when I is final FIXME!!!
#v ; Q?=q1 I / BEG _ > U+0627 @q1 i y ; -I
#-Pn Q?=q1 I / BEG _ > A @q1 i y ; except for Persian, coz of ezafe rules
#-Pv Q?=q1 I / BEG _ > U+0627 @q1 i y
#Pn Q?=q1 I / BEG _ ^# > A @q1 i y ; -I...
#Pv Q?=q1 I / BEG _ ^# > U+0627 @q1 i y ; -I...
#P Z Q?=qq (I|E) / _ (#|Z) > @qq i y ;
#P UL Q?=q1 U > @q1 U
#P Q?=q1 U / # _ # > U+0627 @q1 u w
#P Q BAR > U+0020; "| to separate compounds >> IN TRANSLIT THIS WILL BE REMOVED


;; Words with anomalous orthography:

; mi'aT -> ماية  
;;; i H / m Q ? _ Q ? a ? (T|t) > i A Hy 
#n m Q i H Q a / _ (T|t)   > m Q i A Hy Q a
#n m Q i H a   / _ (T|t)   > m Q i A Hy
#n m i H Q a   / _ (T|t)   > m A Hy Q a
#n m i H a     / _ (T|t)   > m A Hy
#v m Q i H Q a / _ (T|t)   > m U+0627 Hy 
#v m Q i H a   / _ (T|t)   > m U+0627 Hy a
#v m i H Q a   / _ (T|t)   > m i U+0627 Hy 
#v m i H a     / _ (T|t)   > m i U+0627 Hy a

#-S 
#-S ;; contextual analysis of hamzas
#-S ;; THESE DO NOT APPLY FOR SINDHI
#-S 
#-S ; initial hamzas 
#-S H  / (#|^[LTR]) _ Q? [Iyi] >   Hi 
#-S H Q A / (#|^[LTR]) _       >   HA ; alif madda
#-S H  / (#|^[LTR]) _ Q? [Uwu] >   Ha 
#-S H  / (#|^[LTR]) _ Q? a     >   Ha  
#-S Q H / (#|^[LTR]) _         >   HB ; quoted isolated hamza at beginning of word
#-S 
#-S ; quoted hamzas in contextual mode, equivalent to verbatim mode
#-S a Q H >   Ha  
#-S i Q H >   Hi 
#-S y Q H >   Hy 
#-S w Q H >   Hw 
#-S h Q H >   Hh 
#-S A Q H >   HA 
#-S B Q H >   HB 
#-S BAR Q H > HBAR 
#-S 
#-S ; hamza + fathatan
#-S H / a _ Q? AN  > Ha
#-S ;;H / [Iyi] _ Q? AN  > Hy 
#-S H / _ Q? AN > Hy
#-S 
#-S ;final hamzas
#-S H / A _ Q? [VWL]? N? (#|^[LTR]) > HBAR
#-S H / [Uw] _ Q? [VWL]? N? (#|^[LTR]) > HBAR
#-S H / [Iy] _ Q? (i N?|u N?)? (#|^[LTR]) > HBAR ; not AN ...
#-S H / a _ Q? (#|^[LTR]) > Ha ; not AN... 
#-S H / a _ Q? (u|i) N? (#|^[LTR]) > Ha ; not AN... 
#-S H / i _ Q? [VWL]? N? (#|^[LTR]) > Hy
#-S H / u _ Q? [VWL]? N? (#|^[LTR]) > Hw
#-S 
#-S ;;and more hamzas ...
#-S 
#-S ; n°1: kasra/ya
#-S H / [LTR] _ Q? [Iyi] ^N > Hy
#-S H / [Iyi] _ > Hy
#-S 
#-S ; n°2: damma/waw 
#-S H / [LTR] _ Q? (U|u) ^N > Hw
#-S H / (U|u) _ Q? ^[Iyi] > Hw
#-S H / U _ > HBAR
#-S 
#-S ; n°3: fatha/alif
#-S H Q? A > HA
#-S H / A _ Q? a > HBAR
#-S H / ^[UI] _ Q? a > Ha
#-S H / a _ Q? ^[UI] > Ha

##
##;; take care of all shaddas at once? NOT WORKING, have to give them one by one
##;; ((V|C|P|UL)=l1 (b|c|d|f|g|h|j|k|l|m|n|p|q|r|s|t|w|x|y|z|Ain|T)=l2){2,2} > @l1 @l2 U+0651 ;
##;; ((b|c|d|f|g|h|j|k|l|m|n|p|q|r|s|t|w|x|y|z|Ain|T)=l1){2,2} > @l1 U+0651 ;
##;; #U ;special rules for Urdu : NO! tashdid realized as ^c^ch instead of ^ch^ch
##;; #U (C (c|g)=x h){2,2}  > C @x h U+0651
##;; #U (V (t|d|r)=x h){2,2}  > V @x h U+0651;
##;; #U ((b|p|t|j|d|r|k|g|l|m|n)=x h){2,2} > @x h U+0651

pass(Unicode)

Class [Digits] = (U+0030..U+0039)
Class [ArDigits] = (U+0660..U+0669)
Class [ArEasternDigits] = (U+06F0..U+06F9)
Class [VWL]         = (U+0061  U+0065  U+0069  U+006F  U+0075 ) ;(a e i o u) 
Class [LTR]	    = (U+0061..U+007A U+0041 U+0045 U+0049 U+004F U+0055 U+0042 U+004E U+0054 U+0059 U+005E U+002E U+005F U+0022 U+0027 U+0060) ;(a..z A E I O U B N T Y ^ . _ " "" `)
Class [Iy]	    = (U+0049 U+0079) ;(I y)
Class [Uw]          = (U+0055 U+0077) ;(U w)


##; all hamzas are now in verbatim mode
##;;;H a > U+0623
##;;;H i > U+0625
##;;;H y > U+0626
##;;;H w > U+0624
##;;;H h > U+0647 U+0654
##;;;H A > U+0622
##;;;H B > U+0640 U+0654
##;;;H BAR > U+0621 ; '| 


;normal code
##  add code for V S M K .k .l (.p=.f) (.v=.b) .y
##  .A .U (.T=Y) 
##  CHECK ,a ,c ,d ,e ,g ,h ,n ,r ,s ,t ,z ,A
##  ^ alone=alif with hazma above, ^a=alif hamza fatha, ^i = alif hamza kasra, ^u=alif damma waw (no hamza) 
##  CHECK ^d ^e ^i ^l ^n ^z
##  CHECK (^A='A) 
##  CHECK (_k=.g) (_s=_t) (_z=_d) 
##  IMPLEMENT (:a :e :i :o :u = alif + vowel)
##  IMPLEMENT :b :d :g :j :n :s (:t=.t?) (:z=.d?) :O :U

#S ;; special code for sindhi
#S ;; U+004D U+0049 U+004E > U+06FE ; define macro \MIN in sindhi env
#S P m I N > U+06FE ; .mIN
#S ;; U+0049 U+004E > U+06FD ; IN     define macro \IN in sindhi env
#S H BAR I N > U+06FD ; '|IN

#K CO z CO z > U+0636 U+0651  ;;SHADDA
#K CO z	     > U+0636 
(P z|U+1E93) (P z|U+1E93)  > U+0638 U+0651  ;;SHADDA
(P z|U+1E93)  > U+0638 
C z C z	  > U+0698 U+0651  ;;SHADDA
C z	  > U+0698 
#-O V z V z   > U+0696 U+0651 ;;SHADDA
#-O V z       > U+0696 ;pashto
#O V z V z   > U+0636 U+0651  ;;SHADDA
#O V z       > U+0636 
z z	  > U+0632 U+0651  ;;SHADDA
z	  > U+0632 
#n Y Y	  > U+0649 U+0651  ;;SHADDA
#n Y	  > U+0649 
#v Q Y Y  > U+0649 U+0651  ;;SHADDA
#v Q Y	  > U+0649 
#v Y Y	  > U+064E U+0649 U+0651  ;;SHADDA
#v Y	  > U+064E U+0649 
#K ;; .b with ring below : FIXME not in Unicode! but U+06EA is a poor replacement:
#K P y / # _ # > U+0627 U+06D2 U+06EA
#K P y / _ # > U+0649
#K P y > U+066E U+06EA
#-ASM ;; y / _ # > U+0649 ; y final in persian,ottoman,urdu,pashto,kashmiri has no dots 
#-ASM y y > U+06CC U+0651 ;;SHADDA
#-ASM y   > U+06CC ; yeh in persian etc. > iso and final shapes have no dots
#ASMv P I P I / _ # > U+0650 U+06CC U+0651 ;; SHADDA
#ASMv P I / _ # > U+0650 U+06CC ;; workaround to have ya without dots in Arabic
#ASMn P I P I / _ # > U+06CC U+0651 ;; SHADDA
#ASMn P I / _ # > U+06CC ;; workaround to have ya without dots in Arabic
#ASM y y  > U+064A U+0651  ;;SHADDA
#ASM y	  > U+064A 
#B v v > U+06A5 U+0651 ;;SHADDA 
#B v > U+06A5 ;; maghribi
#M v v	> U+06CF U+0651  ;;SHADDA
#M v > U+06CF
#-MB v v  > U+06A4 U+0651  ;;SHADDA
#-MB v	  > U+06A4 
W 	  > U+0648 U+0627 ; waw + alif 
C U       > U+064F U+0648 U+0653; historical spelling : madda on waw
#n Q U [Uw]  > U+064F U+0648 U+0651  ;;SHADDA
#n Q U       > U+064F U+0648 
#n [Uw] [Uw] > U+0648 U+0651  ;;SHADDA
#n [Uw]	  > U+0648
C w C w > U+06C9 U+0651 ;;SHADDA
C w > U+06C9
CO w CO w > U+06CA U+0651 ;,SHADDA
CO w > U+06CA
#n Q u N	  > U+064C 
#n u N       > 
#n Q u	  > U+064F 
#v U A / _ # > U+064F U+0648 U+0627 
#v Q U [Uw]  > U+0648 U+0651  ;;SHADDA
#v Q U       > U+0648 
#v U [Uw]    > U+064F U+0648 U+0651  ;;SHADDA
#v U         > U+064F U+0648 
#v w w	  > U+0648 U+0651  ;;SHADDA
#v w	  > U+0648 
#v Q u N?    > 
#v u N	  > U+064C 
#v u	  > U+064F 
#S V t h V t h > U+067A U+0651 ;;SHADDA
#S V t h > U+067A
#S t h t h > U+067F U+0651 ;;SHADDA
#S t h > U+067F
#S V t V t > U+067D U+0651 ;;SHADDA
#S V t > U+067D
#T V t V t > U+067C U+0651 ;;SHADDA
#T V t    > U+067C ; pashto
#-TS V t V t   > U+0679 U+0651 ;;SHADDA
#-TS V t 	  > U+0679 ;urdu
UL (t|s) UL (t|s) > U+062B U+0651  ;;SHADDA
UL (t|s)  > U+062B 
(P t|U+1E6D) (P t|U+1E6D)  > U+0637 U+0651  ;;SHADDA
(P t|U+1E6D)	  > U+0637 
t t	  > U+062A U+0651  ;;SHADDA
t	  > U+062A 
#n T T Q AN  > U+0629 U+0651 U+064B   ;;SHADDA
#n T Q AN    > U+0629 U+064B  
#n T AN      > U+0629 
#v T T Q AN  > U+0629 U+0651   ;;SHADDA
#v T Q AN    > U+0629   
#v T AN      > U+0629 U+064B
T T	  > U+0629 U+0651  ;;SHADDA
T	  > U+0629 
C s C s   > U+0634 U+0651  ;;SHADDA
C s	  > U+0634 
(P s|U+1E63) (P s|U+1E63)  > U+0635 U+0651  ;;SHADDA
(P s|U+1E63)  > U+0635 
#-O V s V s   > U+069A U+0651 ;;SHADDA
#-O V s       > U+069A ;Pashto
#O V s V s > U+0634 U+0651  ;;SHADDA   
#O V s     > U+0634 
CO s CO s > U+069B U+0651 ;;SHADDA
CO s      > U+069B ; sin with 3 dots below
## ;; ADD 069C-069F <<<<<<<<<<
s s	  > U+0633 U+0651  ;;SHADDA
s	  > U+0633 
C r C r	  > U+06EF U+0651 ;;SHADDA ; missing in ArabTeX  <<<
C r	  > U+06EF 
CO r CO r > U+0697 U+0651 ;;SHADDA ; missing in ArabTeX <<<
CO r      > U+0697 ; Dargwa language, Dagestan
P r P r	  > U+0694 U+0651 ;;SHADDA
P r	  > U+0694 ; r with dot below; in Kurdish .r -> 0694 
#S V r V r > U+0699 U+0651 ;;SHADDA
#S V r > U+0699
#T V r V r  > U+0693 U+0651 ;;SHADDA
#T V r 	  > U+0693 ;Pashto
#-ST V r V r  > U+0691 U+0651 ;;SHADDA
#-ST V r 	  > U+0691 ;Urdu 
r r	  > U+0631 U+0651  ;;SHADDA
r	  > U+0631 
P q P q	  > U+066F U+0651  ;;SHADDA
P q	  > U+066F 
#-B q q	  > U+0642 U+0651  ;;SHADDA
#-B q	  > U+0642 
#B q q > U+06A7 U+0651 ;; maghribi
#B q > U+06A7 ;; maghribi
#S p h p h > U+06A6 U+0651 ;;SHADDA
#S p h > U+06A6
#M p p > U+06A8 U+0651  ;;SHADDA
#M p > U+06A8 ;Old malay
#-M p p	  > U+067E U+0651  ;;SHADDA
#-M p	  > U+067E 
CO n CO n > U+06B1 U+0651 ;;SHADDA
CO n > U+06B1
#M C n C n > U+06BD U+0651 ;;SHADDA 
#M C n > U+06BD ; <<< ArabTeX yields nūn with three dots below, which is not in Unicode!
#S C n C n > U+0683 U+0651 ;;SHADDA
#S C n > U+0683
#-SM C n C n  > U+06AD U+0651  ;;SHADDA
#-SM C n      > U+06AD 
## U+06AE Berber <<<
#S P P n P P n > U+06B2 U+0651 ;;SHADDA 
#S P P n > U+06B2 ;; old sindhi? (not in ArabTeX) <<<
P n P n   > U+06BA U+0651  ;;SHADDA
P n 	  > U+06BA 	   ; urdu
#S V n V n > U+06BB U+0651 ;;SHADDA
#S V n > U+06BB
#-S V n V n   > U+06BC U+0651 ;;SHADDA 
#-S V n       > U+06BC ; Pashto
n n	  > U+0646 U+0651  ;;SHADDA
n	  > U+0646 
m m	  > U+0645 U+0651  ;;SHADDA
m	  > U+0645 
P l P l	  > U+06B6 U+0651 ; 
P l 	  > U+06B6 ; ADDED <<< 
C l C l	  > U+06B5 U+0651 ;;SHADDA
C l	  > U+06B5 ;kurdish 
## U+06B7 U+06B8 U+06B9 missing <<<
l l	  > U+0644 U+0651  ;;SHADDA
l	  > U+0644 
#S k h k h > U+06A9 U+0651 ;;SHADDA
#S k h > U+06A9
#-O P k P k > U+06A9 U+0651 ;;SHADDA 
#-O P k > U+06A9 ; pashto urdu 
#O P k P k > U+0642 U+0651 ;;SHADDA
#O P k > U+0642 ; Ottoman qaf
UL k UL k > U+063A U+0651 ;;SHADDA
UL k  > U+063A
#S k k > U+06AA U+0651 ;;SHADDA
#S k > U+06AA  ; kaf swash 
#-S k k	  > U+0643 U+0651  ;;SHADDA
#-S k	  > U+0643 
#-MO C g C g   > U+062C U+0651  ;;SHADDA
#-MO C g	  > U+062C 
#MO C g C g > U+06A0 U+0651 ;;SHADDA
#MO C g > U+06A0
CO j CO j > U+0684 U+0651 ;;SHADDA
CO j > U+0684
#O j j	  > U+0698 U+0651  ;;SHADDA
#O j	  > U+0698 
#-O j j	  > U+062C U+0651  ;;SHADDA
#-O j	  > U+062C 
#n Q I	     > U+0650 U+064A
#v Q I       > U+064A
#v I         > U+0650 U+064A
C I       > U+0650 U+064A U+0653 ; historical spelling : madda on ya
UL I 	  >  ; suggestion of O. Smrz (arabtex-plus project) URL = ###
#n I         > U+064A
#n Q i N     > U+064D 
#n i N       >  ;
#n Q i       > U+0650 
#v Q i N?    >  ;
#v i N       > U+064D 
#v i         > U+0650 
x x	  > U+062E U+0651  ;;SHADDA
x	  > U+062E 
#PT U+0048 Z > U+0647 U+200C U+0020 ;eH-suffix  > eH suffix
#PT U+0048 > U+0647 U+200C ; H is always heh final
#U U+0048 >  U+06C3 
UL h UL h > U+062E U+0651  ;;SHADDA
UL h	  > U+062E 
(P h|U+1E25) (P h|U+1E25)  > U+062D U+0651  ;;SHADDA
(P h|U+1E25) > U+062D 
V h V h   > U+06C1 U+0651 ;;SHADDA
V h       > U+06C1
#U H H   > U+06C1 U+0651 ;;SHADDA
#U H     > U+06C1
#U h  h	  > U+06BE U+0651 ;;SHADDA
#U h  	  > U+06BE ; urdu  
#U ;; fix letter+h letter+h > letter+h shadda in URDU <<<
#S h h > U+06BE U+0651 ;;SHADDA
#S h > U+06BE
#-US h h  > U+0647 U+0651  ;;SHADDA
#-US h	  > U+0647 
#S P CO g P CO g > U+06B4 U+0651 ;;SHADDA
#S P CO g > U+06B4 ;; old sindhi? (not in ArabTeX) <<<
CO g CO g > U+06B3 U+0651 ;;SHADDA
CO g > U+06B3
V g V g > U+06AC U+0651 ;;SHADDA
V g > U+06AC ; kaf with dot above
P g P g	  > U+063A U+0651  ;;SHADDA
P g	  > U+063A 
#M g g > U+0762 U+0651 ;;SHADDA
#M g > U+0762 
#-M g g	  > U+06AF U+0651  ;;SHADDA
#-M g	  > U+06AF 
G G       > U+06AB U+0651  ;;SHADDA
G         > U+06AB ;pashto
## U+06B0 western punjabi <<<
P f P f	  > U+06A1 U+0651  ;;SHADDA
P f	  > U+06A1 
#B f f > U+06A2 U+0651 ;; SHADDA
#B f > U+06A2 ;; maghribi
## ingushi CO f > U+06A3 <<<
#-B f f	  > U+0641 U+0651  ;;SHADDA
#-B f	  > U+0641 
#S V d h V d h > U+068D U+0651 ;;SHADDA
#S V d h > U+068D
#-TU V d V d > U+068A U+0651 ;;SHADDA
#-TU V d > U+068A
#T V d V d > U+0689 U+0651 ;;SHADDA
#T V d 	  > U+0689  ;pashto
#U V d V d  > U+0688 U+0651 ;;SHADDA
#U V d	  > U+0688 ;urdu
P V d P V d > U+068B U+0651 ;;SHADDA
P V d > U+068B ;; (like U+0688 with dot below, for Lahnda = Western Punjabi) 
#S d h d h > U+068C U+0651 ;;SHADDA
#S d h > U+068C
#S C d C d > U+068E  ;;SHADDA
#S C d > U+068E  ; old sindhi: not in ArabTeX
#-S C d C d > U+06EE U+0651 ;;SHADDA
#-S C d > U+06EE ; not defined in ArabTeX
CO d CO d > U+068F U+0651 ;;SHADDA
CO d > U+068F
#U CO CO d CO CO d > U+0690 U+0651 ;;SHADDA
#U CO CO d > U+0690 ; old urdu: not in ArabTeX
UL (d|z) UL (d|z) > U+0630 U+0651  ;;SHADDA
UL (d|z)  > U+0630 
(P d|U+1E0D) (P d|U+1E0D)  > U+0636 U+0651  ;;SHADDA
(P d|U+1E0D)  > U+0636 
d d	  > U+062F U+0651  ;;SHADDA
d	  > U+062F 
#S C c h C c h > U+0687 U+0651 ;;SHADDA
#S C c h > U+0687
#O V c V c > U+0686 U+0651 ;;SHADDA
#O V c > U+0686 ; Ottoman ç
#-O V c V c   > U+0685 U+0651 ;;SHADDA
#-O V c	  > U+0685 ;pashto 
C c C c	  > U+0686 U+0651  ;;SHADDA
C c	  > U+0686
P C c P C c > U+06BF U+0651 ;;SHADDA
P C c > U+06BF ;; cheh with dot above <<<
#M c c > U+0686  U+0651 ;;SHADDA
#M c > U+0686 
#O c c > U+062C U+0651 ;;SHADDA
#O c > U+062C
#-MO c c  > U+0681 U+0651 ;;SHADDA
#-MO c	  > U+0681 ; ح with hamza above
CO c CO c > U+0682 U+0651 ;;SHADDA
CO c > U+0682 ; old pashto 
#S b h b h > U+0680 U+0651 ;;SHADDA
#S b h > U+0680
CO b CO b > U+067B U+0651 ;;SHADDA
CO b > U+067B
P b P b   > U+066E U+0651  ;;SHADDA
P b       > U+066E 
b b	  > U+0628 U+0651  ;;SHADDA
b	  > U+0628 
H A       > U+0622 
C A       > U+064E U+0622 ; historical spelling
#n UL A      > U+0649 
#v UL A      > U+064E U+0649 
UL a	  > U+0670
#A UL u   > U+0657 ; inverted damma
#A UL i   > U+0656 ; subscript alif
#-An UL u   > ; 
#-An UL i   > ; 
#-Av UL u   > U+064F; 
#-Av UL i   > U+0650; 
#n Q AN / (A H|Ha) _ > U+064B
#n AN / (A H|Ha) _ > ;
#v Q AN / (A H|Ha) _ > ; 
#v AN / (A H|Ha) _ > U+064B
#n Q AN   > U+064B U+0627
#n AN  	  > U+0627 
#n Q a	  > U+064E 
#n Q A	  > U+064E U+0627 
#n A	  > U+0627 
#v Q AN   > U+0627 
#v AN 	  > U+064B U+0627
#v Q a	  > 
#v a 	  > U+064E 
#v Q A	  > U+0627 
#v A	  > U+064E U+0627 
Ain Ain	  > U+0639 U+0651  ;;SHADDA
Ain	  > U+0639 
H H	  > U+0621 U+0651 ;;SHADDA
H	  > U+0621 
U+002C / [Digits] _ [Digits] > U+002C  ; comma in numerical context not an Arabic comma
Z Z / [Digits] _ [Digits] > U+2013  ; two hyphens in numerical context -> endash
(Z|U+005D|U+005B|U+0028|U+0029)=xx / [Digits] _ [Digits] > U+202D @xx U+202C ; hyphen or brackets in numerical context: surrounded by LRO & PDF marks
#-PUSKT [Digits]  > [ArDigits]
#PUSKT [Digits] > [ArEasternDigits]
Z Z Z / # _ #    > U+2014 
Z Z / # _ #    > U+2013 
Z Z       > U+0640
B         > U+0640
Z / # _   > U+200D  ; -x  > force initial form with "zero-width joiner"
Z / _ #   > U+200D  ;  x- > force final form    
U+003C U+003C  > U+00BB ;<<
U+003E U+003E  > U+00AB ;>>
U+00AB    > U+00BB 
U+00BB    > U+00AB
UL        > U+0640  ;_
U+003F    > U+061F  ;?
U+003B    > U+061B  ;;
U+003A U+003A > U+061E ;; '::' ligature 'DOTS' also defined above...
U+002C    > U+060C  ;,
U+005D / [LTR] _ [LTR]   > U+200D U+005D U+200D ; keep shaping when inserting ...[...]... ... 
U+005B / [LTR] _ [LTR]   > U+200D U+005B U+200D ; (but of course this screws up contextual 
                                                ; analysis, so quoting of hamzas etc may be necessary
;U+005D    > U+005B  ;]  these are automatically mirrored, following Unicode rules
;U+005B    > U+005D  ;[
#n [VWL]    >   ;stripoff vowels
#v Q [VWL]   >   ;stripoff quoted vowels
#n Q	  > U+0652 
@v Q 	  > U+0652
U+00B0	  > U+0652 ; °
N         >   ;
C         >   ;^
#n Z      >   ;-
BAR       >   ;| 
#U P P > U+06D4 ; urdu full stop << check

#v pass(Unicode)
#v  
#v Class [CONS] = (U+0621 U+0623 U+0624 U+0626 U+0628 U+062A..U+063A U+0641..U+0647 U+066E U+066F U+0672 U+0675 U+0676 U+0678..U+06BF U+06EE U+06EF U+06FA..U+06FF U+0750..U+076D) ; to be improved: not sure that all glyphs listed (esp for Urdu,Sindhi,etc are really "consonants" in the sense that they can carry a sukun, not to mention that some of these languages may not have a sukun at all ;-)
#v Class [VWLX] = (U+0627 U+0648..U+065E U+0670) ; long vowels, vowel signs, shadda, sukun etc.
#v Class [VWL]  = (U+0627 U+0648..U+0650 U+0670);
#v
#f [CONS]=k1 / _ Z? [CONS] ^U+0651 > @k1 U+0652  ; hack: 2nd consonant has no shadda
#f U+064E (U+064A|U+0648)=hc / _ (^[VWLX]|#) > U+064E @hc U+0652 ; ay and aw diphtongs
@Av U+0627 [VWL]?  / [VWL] Z _ > U+0627 ; e.g. fa-isti_hraj -> initial alif has no vowel 
@Bv U+0627 [VWL]?  / [VWL] Z _ > U+0627 ; e.g. fa-isti_hraj -> initial alif has no vowel 
#Af U+0627 [VWL]?  / [VWL] Z _ > U+0671 ; e.g. fa-isti_hraj -> wasla on initial alif 
#f;; U+0627 / [VWL] U+0020 _ U+0644 > U+0671 ; wasla on initial alif-lam >>>THIS DOES NOT WORK<<< implemented in first pass instead...
#f U+0627 / # _ U+0644 > U+0627 U+064E ; otherwise initial alif takes fatha
#f [CONS]=k1 / _ ([CONS]|#) > @k1 U+0652  ; consonant + (consonant or final)
#f ;; [CONS]=k1 / _ # > @k1 U+0652 ; final consonant
U+FDF2 > U+0644 U+0644 U+0647 ;
#f Q > ;
#v Z > ;