U
    mhYJ                     @   s   d Z ddlZddlZddlmZmZmZ G dd deZdd Z	G d	d
 d
eZ
G dd deZdddZedkrddlZe  dS )u  Unicode utility functions

>>> from .import unicode_util
>>> from .util import u
>>> u1 = '1'  # DIGIT ONE
>>> u2 = u('a')  # LATIN SMALL LETTER A
>>> u3 = u('２')  # FULLWIDTH DIGIT TWO
>>> u4 = u('Ā')  # LATIN CAPITAL LETTER A WITH MACRON
>>> unicode_util.Category.get(u1) == u('Nd')
True
>>> unicode_util.Category.get(u2) == u('Ll')
True
>>> unicode_util.Category.get(u3) == u('Nd')
True
>>> unicode_util.Category.get(u4) == u('Lu')
True
>>> unicode_util.Category.get(u2) == unicode_util.Category.LOWERCASE_LETTER
True
>>> try:
...     beyond_bmp = u('𐄀')  # AEGEAN WORD SEPARATOR LINE
... except Exception:
...     beyond_bmp = u('')
>>> if len(beyond_bmp) == 1:  # We have a UCS4 build of Python
...     cat_po = unicode_util.Category.get(beyond_bmp)
... else:  # UCS2 build of Python; no non-BMP chars available
...     cat_po = unicode_util.Category.OTHER_PUNCTUATION
>>> cat_po == u('Po')
True
>>> unicode_util.is_letter(u1)
False
>>> unicode_util.is_letter(u2)
True
>>> unicode_util.is_letter(u3)
False
>>> unicode_util.is_letter(u4)
True
>>> b1 = unicode_util.Block.get(u1)
>>> str(b1)
'Block[0000, 007f]'
>>> b1 == unicode_util.Block.BASIC_LATIN
True
>>> b1 == [0x0000, 0x0075]
False
>>> b2 = unicode_util.Block.get(u2)
>>> b2 == unicode_util.Block.BASIC_LATIN
True
>>> b3 = unicode_util.Block.get(u3)
>>> b3 != unicode_util.Block.BASIC_LATIN
True
>>> b3 == unicode_util.Block.HALFWIDTH_AND_FULLWIDTH_FORMS
True
>>> b4 = unicode_util.Block.get(u4)
>>> b4 == unicode_util.Block.LATIN_EXTENDED_A
True
>>> unicode_util.Block.get(u('ࡠ')) == unicode_util.Block.UNKNOWN
True
>>> try:
...     unknown_block = u('𓐰')
... except Exception:
...     unknown_block = u('')
>>> if len(unknown_block) == 1:  # We have a UCS4 build of Python
...     unicode_util.Block.get(u('𓐰')) == unicode_util.Block.UNKNOWN
... else:  # UCS2 build of Python; no unknown characters available
...     True
True
>>> unicode_util.digit(u1)
1
>>> unicode_util.digit(u2, -1)
-1
>>> unicode_util.digit(u3, -1)
2
>>> str(hash(b3))  # doctest: +ELLIPSIS
'...'
    N   )UnicodeMixinunicoduc                   @   sD  e Zd ZdZedZedZedZedZedZ	edZ
edZed	Zed
ZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZedZ edZ!edZ"ed Z#ed!Z$ed"Z%ed#Z&ed$Z'ed%Z(ed&Z)e*d'd( Z+d)S )*CategoryzaGeneral category of a Unicode character.

    See http://www.unicode.org/reports/tr18/#CategoriesLZLuZLlZLtZLmZLoMZMnZMcZMeNZNdZNlZNoSZSmZScZSkZSoPZPcZPdPsZPePiZPfZPoZZZsZZlZZpCZCcZCfZCsZCoZCnc                 C   s   t |}t t|S )zTReturn the general category code (as Unicode string) for the given Unicode character)r   unicodedatacategory)clsuni_char r   a/var/www/html/peyman_registration/venvv2/lib/python3.8/site-packages/phonenumbers/unicode_util.pyget{   s    zCategory.getN),__name__
__module____qualname____doc__r   ZLETTERUPPERCASE_LETTERLOWERCASE_LETTERTITLECASE_LETTERMODIFIER_LETTEROTHER_LETTERMARKZNON_SPACING_MARKZSPACING_COMBINING_MARKZENCLOSING_MARKNUMBERZDECIMAL_DIGIT_NUMBERZLETTER_NUMBERZOTHER_NUMBERZSYMBOLZMATH_SYMBOLZCURRENCY_SYMBOLZMODIFIER_SYMBOLZOTHER_SYMBOLZPUNCTUATIONZCONNECTOR_PUNCTUATIONZDASH_PUNCTUATIONZOPEN_PUNCTUATIONZCLOSE_PUNCTUATIONZINITIAL_PUNCTUATIONZFINAL_PUNCTUATIONZOTHER_PUNCTUATIONZ	SEPARATORZSPACE_SEPARATORZLINE_SEPARATORZPARAGRAPH_SEPARATORZOTHERZCONTROLZFORMATZ	SURROGATEZPRIVATE_USEZNOT_ASSIGNEDclassmethodr   r   r   r   r   r   Q   sP   r   c                 C   s<   t | }|t jkp:|t jkp:|t jkp:|t jkp:|t jkS )zADetermine whether the given Unicode character is a Unicode letter)r   r   r   r   r   r   r   )r   r   r   r   r   	is_letter   s    

r#   c                   @   s:   e Zd ZdZdddZdd Zdd Zd	d
 Zdd ZdS )_BlockRangez?Describe the range of characters encompassed by a Unicode blockNc                 C   s    || _ || _|d k	r| ||< d S N)startend)selfr&   r'   Zregdictr   r   r   __init__   s    z_BlockRange.__init__c                 C   s&   t |tstS | j|jko$| j|jkS r%   )
isinstancer$   NotImplementedr&   r'   r(   otherr   r   r   __eq__   s    
z_BlockRange.__eq__c                 C   s
   | |k S r%   r   r,   r   r   r   __ne__   s    z_BlockRange.__ne__c                 C   s   t | j| jfS r%   )hashr&   r'   r(   r   r   r   __hash__   s    z_BlockRange.__hash__c                 C   s   t d| j| jf S )NzBlock[%04x, %04x])r   r&   r'   r1   r   r   r   __unicode__   s    z_BlockRange.__unicode__)N)	r   r   r   r   r)   r.   r/   r2   r3   r   r   r   r   r$      s   
r$   c                   @   sL  e Zd ZdZi ZdZeddeZeddeZeddeZ	ed	d
eZ
eddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZedd eZed!d"eZed#d$eZed%d&eZed'd(eZed)d*eZed+d,eZed-d.eZed/d0eZed1d2eZed3d4eZed5d6eZ ed7d8eZ!ed9d:eZ"ed;d<eZ#ed=d>eZ$ed?d@eZ%edAdBeZ&edCdDeZ'edEdFeZ(edGdHeZ)edIdJeZ*edKdLeZ+edMdNeZ,edOdPeZ-edQdReZ.edSdTeZ/edUdVeZ0edWdXeZ1edYdZeZ2ed[d\eZ3ed]d^eZ4ed_d`eZ5edadbeZ6edcddeZ7ededfeZ8edgdheZ9edidjeZ:edkdleZ;edmdneZ<edodpeZ=edqdreZ>edsdteZ?edudveZ@edwdxeZAedydzeZBed{d|eZCed}d~eZDeddeZEeddeZFeddeZGeddeZHeddeZIeddeZJeddeZKeddeZLeddeZMeddeZNeddeZOeddeZPeddeZQeddeZReddeZSeddeZTeddeZUeddeZVeddeZWeddeZXeddeZYeddeZZeddeZ[eddeZ\eddeZ]eddeZ^eddeZ_eddeZ`eddeZaeddeZbeddeZceddeZdeddeZeeddeZfeddeZgeddeZheddeZieddeZjeddeZkeddeZleddeZmeddeZneddeZoeddeZpeddeZqeddeZreddeZseddeZteddeZueddeZveddeZweddeZxeddeZyeddeZzeddeZ{eddeZ|eddeZ}eddeZ~eddeZeddeZeddeZeddeZeddeZeddeZedd eZeddeZeddeZeddeZeddeZed	d
eZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZedd eZed!d"eZed#d$eZed%d&eZed'd(eZed)d*eZed+d,eZed-d.eZed/d0eZed1d2eZed3d4eZed5d6eZed7d8eZed9d:eZed;d<eZed=d>eZed?d@eZedAdBeZedCdDeZedEdFeZedGdHeZedIdJeZedKdLeZedMdNeZedOdPeZedQdReZedSdTeZedUdVeZedWdXeZedYdZeZed[d\eZed]d^eZed_d`eZedadbeZedcddeZededfeZedgdheZedidjeZedkdleZedmdneZedodpeZedqdreZedsdteZedudveZedwdxeZedydzeZed{d|eZed}d~eZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddeZeddZeِdd ZdS (  Blockz*Description of the possible Unicode blocksNr               i  i  iO  iP  i  i  i  i   io  ip  i  i   i  i   i/  i0  i  i  i  i   i  i   iO  iP  i  i  i  i  i  i   i?  i@  i_  i 	  i	  i	  i	  i 
  i
  i
  i
  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i  i  i   i  i   i  i  i  i  i  i   i  i  i  i  i  i   i  i   i?  i@  i_  i`  i  i  i  i   i  i  i  i   iO  iP  i  i  i  i  i  i   i  i   i  i   i  i  i  i  i  i   iO  iP  i  i  i  i   i  i  i  i  i  i   i  i   i  i    io   ip   i   i   i   i   i   i !  iO!  iP!  i!  i!  i!  i "  i"  i #  i#  i $  i?$  i@$  i_$  i`$  i$  i %  i%  i%  i%  i%  i%  i &  i&  i '  i'  i'  i'  i'  i'  i (  i(  i )  i)  i)  i)  i *  i*  i +  i+  i ,  i_,  i`,  i,  i,  i,  i -  i/-  i0-  i-  i-  i-  i-  i-  i .  i.  i.  i.  i /  i/  i/  i/  i 0  i?0  i@0  i0  i0  i0  i 1  i/1  i01  i1  i1  i1  i1  i1  i1  i1  i1  i1  i 2  i2  i 3  i3  i 4  iM  iM  iM  i N  i  i   i  i  iϤ  iФ  i  i   i?  i@  i  i  i  i   i  i   i  i   i/  i0  i?  i@  i  i  iߨ  i  i  i   i/  i0  i_  i`  i  i  iߩ  i   i_  i`  i  i  iߪ  i   i/  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i   i  i   iO  iP  i  i   i  i  i  i   i/  i0  iO  iP  io  ip  i  i   i  i  i  i   i  i  i  i  i? i@ i i i i i i i i i i  i/ i0 iO i i i i i  iO iP i i i i  i? i@ i_ i 	 i	 i 	 i?	 i 
 i_
 i`
 i
 i  i? i@ i_ i` i i  iO i` i i  i i i i   i# i $ i$ i 0 i/4 i h i?j i  i i  i i  i i  iO i  i_ i` i i  i i  i/ i0 i i i i  i i  i i  i i  iO i i i  i i   iߦ i  i? i@ i i  i i   i  i  i i   i i   i c                 C   s   t |}t|}tjdkr*ttj t_ttj|}|dkr|tjtj|d   j	kr|tjtj|d   j
krtjtj|d   S |ttjk r|tjtj|  j	kr|tjtj|  j
krtjtj|  S tjS dS )z7Return the Unicode block of the given Unicode characterNr   r   )r   ordr4   _RANGE_KEYSsorted_RANGESkeysbisectbisect_leftr&   r'   lenUNKNOWN)r   r   
code_pointidxr   r   r   r   }  s$    
z	Block.get)r   r   r   r   r=   r;   r$   ZBASIC_LATINZLATIN_1_SUPPLEMENTZLATIN_EXTENDED_AZLATIN_EXTENDED_BZIPA_EXTENSIONSZSPACING_MODIFIER_LETTERSZCOMBINING_DIACRITICAL_MARKSZGREEK_AND_COPTICZCYRILLICZCYRILLIC_SUPPLEMENTZARMENIANZHEBREWZARABICZSYRIACZARABIC_SUPPLEMENTZTHAANAZNKOZ	SAMARITANZMANDAICZ
DEVANAGARIZBENGALIZGURMUKHIZGUJARATIZORIYAZTAMILZTELUGUZKANNADAZ	MALAYALAMZSINHALAZTHAILAOZTIBETANZMYANMARZGEORGIANZHANGUL_JAMOZETHIOPICZETHIOPIC_SUPPLEMENTZCHEROKEEZ%UNIFIED_CANADIAN_ABORIGINAL_SYLLABICSZOGHAMZRUNICZTAGALOGZHANUNOOZBUHIDZTAGBANWAZKHMERZ	MONGOLIANZ.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDEDZLIMBUZTAI_LEZNEW_TAI_LUEZKHMER_SYMBOLSZBUGINESEZTAI_THAMZBALINESEZ	SUNDANESEZBATAKZLEPCHAZOL_CHIKIZVEDIC_EXTENSIONSZPHONETIC_EXTENSIONSZPHONETIC_EXTENSIONS_SUPPLEMENTZ&COMBINING_DIACRITICAL_MARKS_SUPPLEMENTZLATIN_EXTENDED_ADDITIONALZGREEK_EXTENDEDZGENERAL_PUNCTUATIONZSUPERSCRIPTS_AND_SUBSCRIPTSZCURRENCY_SYMBOLSZ'COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLSZLETTERLIKE_SYMBOLSZNUMBER_FORMSZARROWSZMATHEMATICAL_OPERATORSZMISCELLANEOUS_TECHNICALZCONTROL_PICTURESZOPTICAL_CHARACTER_RECOGNITIONZENCLOSED_ALPHANUMERICSZBOX_DRAWINGZBLOCK_ELEMENTSZGEOMETRIC_SHAPESZMISCELLANEOUS_SYMBOLSZDINGBATSZ$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_AZSUPPLEMENTAL_ARROWS_AZBRAILLE_PATTERNSZSUPPLEMENTAL_ARROWS_BZ$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_BZ#SUPPLEMENTAL_MATHEMATICAL_OPERATORSZ MISCELLANEOUS_SYMBOLS_AND_ARROWSZ
GLAGOLITICZLATIN_EXTENDED_CZCOPTICZGEORGIAN_SUPPLEMENTZTIFINAGHZETHIOPIC_EXTENDEDZCYRILLIC_EXTENDED_AZSUPPLEMENTAL_PUNCTUATIONZCJK_RADICALS_SUPPLEMENTZKANGXI_RADICALSZ"IDEOGRAPHIC_DESCRIPTION_CHARACTERSZCJK_SYMBOLS_AND_PUNCTUATIONZHIRAGANAZKATAKANAZBOPOMOFOZHANGUL_COMPATIBILITY_JAMOZKANBUNZBOPOMOFO_EXTENDEDZCJK_STROKESZKATAKANA_PHONETIC_EXTENSIONSZENCLOSED_CJK_LETTERS_AND_MONTHSZCJK_COMPATIBILITYZ"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_AZYIJING_HEXAGRAM_SYMBOLSZCJK_UNIFIED_IDEOGRAPHSZYI_SYLLABLESZYI_RADICALSZLISUZVAIZCYRILLIC_EXTENDED_BZBAMUMZMODIFIER_TONE_LETTERSZLATIN_EXTENDED_DZSYLOTI_NAGRIZCOMMON_INDIC_NUMBER_FORMSZPHAGS_PAZ
SAURASHTRAZDEVANAGARI_EXTENDEDZKAYAH_LIZREJANGZHANGUL_JAMO_EXTENDED_AZJAVANESEZCHAMZMYANMAR_EXTENDED_AZTAI_VIETZETHIOPIC_EXTENDED_AZMEETEI_MAYEKZHANGUL_SYLLABLESZHANGUL_JAMO_EXTENDED_BZHIGH_SURROGATESZHIGH_PRIVATE_USE_SURROGATESZLOW_SURROGATESZPRIVATE_USE_AREAZCJK_COMPATIBILITY_IDEOGRAPHSZALPHABETIC_PRESENTATION_FORMSZARABIC_PRESENTATION_FORMS_AZVARIATION_SELECTORSZVERTICAL_FORMSZCOMBINING_HALF_MARKSZCJK_COMPATIBILITY_FORMSZSMALL_FORM_VARIANTSZARABIC_PRESENTATION_FORMS_BZHALFWIDTH_AND_FULLWIDTH_FORMSZSPECIALSZLINEAR_B_SYLLABARYZLINEAR_B_IDEOGRAMSZAEGEAN_NUMBERSZANCIENT_GREEK_NUMBERSZANCIENT_SYMBOLSZPHAISTOS_DISCZLYCIANZCARIANZ
OLD_ITALICZGOTHICZUGARITICZOLD_PERSIANZDESERETZSHAVIANZOSMANYAZCYPRIOT_SYLLABARYZIMPERIAL_ARAMAICZ
PHOENICIANZLYDIANZ
KHAROSHTHIZOLD_SOUTH_ARABIANZAVESTANZINSCRIPTIONAL_PARTHIANZINSCRIPTIONAL_PAHLAVIZ
OLD_TURKICZRUMI_NUMERAL_SYMBOLSZBRAHMIZKAITHIZ	CUNEIFORMZ!CUNEIFORM_NUMBERS_AND_PUNCTUATIONZEGYPTIAN_HIEROGLYPHSZBAMUM_SUPPLEMENTZKANA_SUPPLEMENTZBYZANTINE_MUSICAL_SYMBOLSZMUSICAL_SYMBOLSZANCIENT_GREEK_MUSICAL_NOTATIONZTAI_XUAN_JING_SYMBOLSZCOUNTING_ROD_NUMERALSZ!MATHEMATICAL_ALPHANUMERIC_SYMBOLSZMAHJONG_TILESZDOMINO_TILESZPLAYING_CARDSZ ENCLOSED_ALPHANUMERIC_SUPPLEMENTZENCLOSED_IDEOGRAPHIC_SUPPLEMENTZ%MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHSZ	EMOTICONSZTRANSPORT_AND_MAP_SYMBOLSZALCHEMICAL_SYMBOLSZ"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_BZ"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_CZ"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_DZ'CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENTZTAGSZVARIATION_SELECTORS_SUPPLEMENTZ SUPPLEMENTARY_PRIVATE_USE_AREA_AZ SUPPLEMENTARY_PRIVATE_USE_AREA_BrB   r"   r   r   r   r   r   r4      s  r4   c                 C   s*   t | } |dk	rt| |S t| S dS )zReturns the digit value assigned to the Unicode character uni_char as
    integer. If no such value is defined, default is returned, or, if not
    given, ValueError is raised.N)r   r   digit)r   default_valuer   r   r   rF     s    rF   __main__)N)r   r?   r   utilr   r   r   objectr   r#   r$   r4   rF   r   doctesttestmodr   r   r   r   <module>   s   J1
 o
