Commits

Artur Barseghyan committed cc95ece Draft

adding bulgarian pack

Comments (0)

Files changed (11)

 - Giorgos Georgiadis for pointing to a lack of documentation for registering custom language 
   packs and a nice example of using `transliterate` to get rid of accented characters in Greek,
   having other characters left intact (see example/example/foo/greekunaccented.py).
+- Petar Chakarov (@petarchakarov) for Bulgarian language pack.
 
 Bug reporters
 ~~~~~~~~~~~~~~~~~~~~~~
 The following people contributed to the project by reportings bugs.
 
-- Malinich <miltrix@yandex.ru>: reported a bug with reversed translation of ъ and ь characters.
+- Malinich <miltrix@yandex.ru>: reported a bug with reversed translation of ъ and ь characters.
 Comes with language packs for the following languages (listed in alphabetical order):
 
 - Armenian
+- Bulgarian (beta)
 - Georgian (beta)
 - Greek (beta)
 - Russian
 
 Registering a custom language pack
 ----------------------------------
-Basics
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Make sure to call the `autodiscover` function before registering your own language packs if you want to
 use the bundled language packs along with your own custom ones.
 
 
 Forced language packs can't be replaced or unregistered.
 
-Mappings explained
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-There are 7 class properties that you could/should be using in your language pack, of which 4 are various
-sorts of mappings.
-
-- ``mapping`` (tuple): A tuple of two strings, that simply represent the characters from the source language
-  to be converted to the target language. For example, if your source language is Latin and you want to
-  convert "a", "b", "c", "d" and "e" characters to appropriate characters in the Russian Cyrillic, your
-  mapping would look as follows:
-
-  >>> mapping = (u"abcde", u"абцде")
-
-
-
 Using the lorem ipsum generator
 ----------------------------------
 Note, that due to incompatibility of the original `lorem-ipsum-generator` package with Python 3, when used

src/transliterate/base.py

         to source script (reversed transliteration).
     ՝՝pre_processor_mapping՝՝: Pre processor mapping (optional). A dictionary mapping for letters that can't be
         represented by a single latin letter.
-    ՝՝reversed_specific_pre_processor_mapping՝՝: Pre processor mapping (optional). A dictionary mapping for letters
-        that can't be represented by a single latin letter (reversed transliteration).
 
     :example:
 >>>    class ArmenianLanguagePack(TranslitLanguagePack):
 >>>        u"rR"
 >>>    )
 >>>    pre_processor_mapping = {
->>>        # lowercase
 >>>        u"e'": u"է",
 >>>        u"y": u"ը",
 >>>        u"th": u"թ",
 >>>        u"dj": u"ջ",
 >>>        u"ph": u"փ",
 >>>        u"u": u"ու",
->>>
->>>        # uppercase
->>>        u"E'": u"Է",
->>>        u"Y": u"Ը",
->>>        u"Th": u"Թ",
->>>        u"Jh": u"Ժ",
->>>        u"Ts": u"Ծ",
->>>        u"Dz": u"Ձ",
->>>        u"Gh": u"Ղ",
->>>        u"Tch": u"Ճ",
->>>        u"Sh": u"Շ",
->>>        u"Vo": u"Ո",
->>>        u"Ch": u"Չ",
->>>        u"Dj": u"Ջ",
->>>        u"Ph": u"Փ",
->>>        u"U": u"Ու"
->>>    }
->>>    reversed_specific_pre_processor_mapping = {
->>>        u"ու": u"u",
->>>        u"Ու": u"U"
 >>>    }
 
-    Note, that in Python 3 you won't be using u prefix before the strings.
+    Note, thatn in Python 3 you won't be using u prefix before the strings.
     """
     language_code = None
     language_name = None
     character_ranges = None
     mapping = None
     reversed_specific_mapping = None
-
     reversed_pre_processor_mapping_keys = []
-
     reversed_specific_pre_processor_mapping = None
     reversed_specific_pre_processor_mapping_keys = []
-
     pre_processor_mapping = None
     pre_processor_mapping_keys = []
-
     detectable = False
     characters = None
     reversed_characters = None
         :param int limit: Limit number of suggested variants.
         :return list:
         """
-        # TODO
 
     def detect(text, num_words=None):
         """

src/transliterate/contrib/languages/bg/__init__.py

Empty file added.

src/transliterate/contrib/languages/bg/data/__init__.py

+from transliterate.helpers import PY32
+
+if PY32:
+    from transliterate.contrib.languages.bg.data.python32 import *
+else:
+    from transliterate.contrib.languages.bg.data.standard import *

src/transliterate/contrib/languages/bg/data/python32.py

+# -*- coding: utf-8 -*-
+
+mapping = (
+    "abvgdeziyklmnoprstufhABVGDEZIYKLMNOPRSTUFH",
+    "абвгдезийклмнопрстуфхАБВГДЕЗИЙКЛМНОПРСТУФХ",
+)
+
+reversed_specific_mapping = (
+    "ьъЪ",
+    "yaA"
+)
+
+pre_processor_mapping = {
+    "zh": "ж",
+    "ts": "ц",
+    "ch": "ч",
+    "sh": "ш",
+    "sht": "щ",
+    "yu": "ю",
+    "ya": "я",
+    "Zh": "Ж",
+    "Ts": "Ц",
+    "Ch": "Ч",
+    "Sh": "Ш",
+    "Sht": "Щ",
+    "Yu": "Ю",
+    "Ya": "Я"
+}

src/transliterate/contrib/languages/bg/data/standard.py

+# -*- coding: utf-8 -*-
+
+mapping = (
+    u"abvgdeziyklmnoprstufhABVGDEZIYKLMNOPRSTUFH",
+    u"абвгдезийклмнопрстуфхАБВГДЕЗИЙКЛМНОПРСТУФХ",
+)
+
+reversed_specific_mapping = (
+    u"ьъЪ",
+    u"yaA"
+)
+
+pre_processor_mapping = {
+    u"zh": u"ж",
+    u"ts": u"ц",
+    u"ch": u"ч",
+    u"sh": u"ш",
+    u"sht": u"щ",
+    u"yu": u"ю",
+    u"ya": u"я",
+    u"Zh": u"Ж",
+    u"Ts": u"Ц",
+    u"Ch": u"Ч",
+    u"Sh": u"Ш",
+    u"Sht": u"Щ",
+    u"Yu": u"Ю",
+    u"Ya": u"Я"
+}

src/transliterate/contrib/languages/bg/translit_language_pack.py

+# -*- coding: utf-8 -*-
+
+__title__ = 'transliterate.contrib.languages.bg.translit_language_pack'
+__author__ = 'Petar Chakarov'
+__copyright__ = 'Copyright (c) 2014 Petar Chakarov'
+__license__ = 'GPL 2.0/LGPL 2.1'
+__all__ = ('BulgarianLanguagePack',)
+
+from transliterate.base import TranslitLanguagePack, registry
+from transliterate.contrib.languages.bg import data
+
+class BulgarianLanguagePack(TranslitLanguagePack):
+    """
+    Language pack for Bulgarian language. See http://en.wikipedia.org/wiki/Romanization_of_Bulgarian for details.
+    """
+    language_code = "bg"
+    language_name = "Bulgarian"
+    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
+    mapping = data.mapping
+    reversed_specific_mapping = data.reversed_specific_mapping
+    pre_processor_mapping = data.pre_processor_mapping
+    detectable = False
+
+
+registry.register(BulgarianLanguagePack)

src/transliterate/tests/data/normal.py

 armenian_text = u'Լօրեմ իպսում դօլօր սիտ ամետ'
 cyrillic_text = u'Лорем ипсум долор сит амет'
 ukrainian_cyrillic_text = u'Лорем іпсум долор сіт амет'
+bulgarian_cyrillic_text = u'Лорем ипсум долор сит амет'
 georgian_text = u'Ⴊორემ იფსუმ დოლორ სით ამეთ'
 greek_text = u'Λορεμ ιψθμ δολορ σιτ αμετ'
 hebrew_text = u'Lורeמ יpסuמ דולור סית אמeת'

src/transliterate/tests/data/python32.py

 armenian_text = 'Լօրեմ իպսում դօլօր սիտ ամետ'
 cyrillic_text = 'Лорем ипсум долор сит амет'
 ukrainian_cyrillic_text = 'Лорем іпсум долор сіт амет'
+bulgarian_cyrillic_text = 'Лорем ипсум долор сит амет'
 georgian_text = 'Ⴊორემ იფსუმ დოლორ სით ამეთ'
 greek_text = 'Λορεμ ιψθμ δολορ σιτ αμετ'
 hebrew_text = 'Lורeמ יpסuמ דולור סית אמeת'

src/transliterate/tests/test_transliterate.py

         self.armenian_text = data.armenian_text
         self.cyrillic_text = data.cyrillic_text
         self.ukrainian_cyrillic_text = data.ukrainian_cyrillic_text
+        self.bulgarian_cyrillic_text = data.bulgarian_cyrillic_text
         self.georgian_text = data.georgian_text
         self.greek_text = data.greek_text
         self.hebrew_text = data.hebrew_text
         """
         res = get_available_language_codes()
         res.sort()
-        c = ['el', 'hy', 'ka', 'ru', 'uk'] #'he',
+        c = ['el', 'hy', 'ka', 'ru', 'uk', 'bg'] #'he',
         c.sort()
         self.assertEqual(res, c)
         return res
         res = translit(self.latin_text, 'uk')
         self.assertEqual(res, self.ukrainian_cyrillic_text)
         return res
+      
+    @print_info
+    def test_06_translit_latin_to_bulgarian_cyrillic(self):
+        """
+        Test transliteration from Latin to Bulgarian Cyrillic.
+        """
+        res = translit(self.latin_text, 'bg')
+        self.assertEqual(res, self.bulgarian_cyrillic_text)
+        return res
 
     @print_info
     def test_07_translit_armenian_to_latin(self):
     @print_info
     def test_11_translit_cyrillic_to_latin(self):
         """
-        Test transliteration from Cyrillic to Latun.
+        Test transliteration from Cyrillic to Latin.
         """
         res = translit(self.cyrillic_text, 'ru', reversed=True)
         self.assertEqual(res, self.latin_text)
     @print_info
     def test_11_translit_ukrainian_cyrillic_to_latin(self):
         """
-        Test transliteration from Ukrainian Cyrillic to Latun.
+        Test transliteration from Ukrainian Cyrillic to Latin.
         """
         res = translit(self.ukrainian_cyrillic_text, 'uk', reversed=True)
         self.assertEqual(res, self.latin_text)
         return res
+      
+    @print_info
+    def test_11_translit_bulgarian_cyrillic_to_latin(self):
+        """
+        Test transliteration from Bulgarian Cyrillic to Latin.
+        """
+        res = translit(self.bulgarian_cyrillic_text, 'bg', reversed=True)
+        self.assertEqual(res, self.latin_text)
+        return res
 
     @print_info
     def test_12_function_decorator(self):
         res = g_uk.generate_sentence()
         assert res
         return res
+      
+    @print_info
+    def test_20_translipsum_generator_bulgarian_cyrillic(self):
+        """
+        Testing the translipsum generator. Generating lorem ipsum sentence in Bulgarian Cyrillic.
+        """
+        g_bg = TranslipsumGenerator(language_code='bg')
+        res = g_bg.generate_sentence()
+        assert res
+        return res
 
     @print_info
     def test_21_language_detection_armenian(self):
         res = detect_language(self.ukrainian_cyrillic_text)
         self.assertEqual(res, 'uk')
         return res
+      
+    @print_info
+    def __test_25_language_detection_bulgarian_cyrillic(self):
+        """
+        Testing language detection. Detecting Bulgarian (Cyrillic).
+        """
+        res = detect_language(self.bulgarian_cyrillic_text)
+        self.assertEqual(res, 'bg')
+        return res
 
     @print_info
     def test_26_slugify_armenian(self):
         res = slugify(self.ukrainian_cyrillic_text, language_code='uk')
         self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet')
         return res
+      
+    @print_info
+    def test_30_slugify_bulgarian_cyrillic(self):
+        """
+        Testing slugify from Bulgarian Cyrillic.
+        """
+        res = slugify(self.bulgarian_cyrillic_text, language_code='bg')
+        self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet')
+        return res
 
     @print_info
     def test_31_override_settings(self):