Skip to content

Commit 6fa5464

Browse files
authored
Merge pull request #13 from neocl/dev
Pump master version to 0.1a6
2 parents e70124c + d94fd05 commit 6fa5464

10 files changed

+224
-5
lines changed

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ include LICENSE
44
include requirements*.txt
55
recursive-include jamdict/data/ *.sql
66
recursive-include jamdict/data/ *.json
7+
recursive-include jamdict/data/ *.gz

README.md

-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,5 @@ result = jmd.lookup('食べる')
9797
...
9898
[id#1358280] たべる (食べる) : 1. to eat ((Ichidan verb|transitive verb)) 2. to live on (e.g. a salary)/to live off/to subsist on
9999
```
100-
>>> for c in result.chars:
101-
... print(repr(c))
102100

103101
See `jamdict_demo.py` and `jamdict/tools.py` for more information.

jamdict/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
__url__ = "https://github.com/neocl/jamdict"
1111
__maintainer__ = "Le Tuan Anh"
1212
__version_major__ = "0.1"
13-
__version__ = "{}a5".format(__version_major__)
13+
__version__ = "{}a6".format(__version_major__)
1414
__version_long__ = "{} - Alpha".format(__version_major__)
1515
__status__ = "Prototype"

jamdict/data/kradfile-u.gz

106 KB
Binary file not shown.

jamdict/data/radkfile.gz

39.9 KB
Binary file not shown.

jamdict/kanjidic2.py

+21
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,17 @@
5252
from lxml import etree
5353

5454
from chirptext import chio
55+
from chirptext.sino import Radical as KangxiRadical
5556

57+
from .krad import KRad
5658

5759
# ------------------------------------------------------------------------------
5860
# Configuration
5961
# ------------------------------------------------------------------------------
6062

63+
krad = KRad()
64+
65+
6166
def getLogger():
6267
return logging.getLogger(__name__)
6368

@@ -112,6 +117,7 @@ def __init__(self):
112117
self.literal = '' # <!ELEMENT literal (#PCDATA)> The character itself in UTF8 coding.
113118
self.codepoints = [] # <!ELEMENT codepoint (cp_value+)>
114119
self.radicals = [] # <!ELEMENT radical (rad_value+)>
120+
self.__canon_radical = None
115121
self.stroke_count = None # first stroke_count in misc
116122
self.grade = None # <misc>/<grade>
117123
self.stroke_miscounts = [] # <misc>/stroke_count[1:]
@@ -135,6 +141,21 @@ def __repr__(self):
135141
def __str__(self):
136142
return self.literal
137143

144+
@property
145+
def components(self):
146+
if self.literal in krad.krad:
147+
return krad.krad[self.literal]
148+
else:
149+
return []
150+
151+
@property
152+
def radical(self):
153+
if self.__canon_radical is None:
154+
for rad in self.radicals:
155+
if rad.rad_type == 'classical':
156+
self.__canon_radical = KangxiRadical.kangxi()[rad.value]
157+
return self.__canon_radical
158+
138159
def to_json(self):
139160
return {'literal': self.literal,
140161
'codepoints': [cp.to_json() for cp in self.codepoints],

jamdict/krad.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# -*- coding: utf-8 -*-
2+
3+
'''
4+
Module for retrieving kanji components (i.e. radicals)
5+
Latest version can be found at https://github.com/neocl/jamdict
6+
7+
This package uses the RADKFILE/KRADFILE[1] file.
8+
These files are the property of the [Electronic Dictionary Research and Development Group][2], and are used in conformance with the Group's [licence][3].
9+
10+
[1]: http://www.edrdg.org/krad/kradinf.html
11+
[2]: http://www.edrdg.org/
12+
[3]: http://www.edrdg.org/edrdg/licence.html
13+
14+
References:
15+
JMDict website:
16+
http://www.csse.monash.edu.au/~jwb/edict.html
17+
Python documentation:
18+
https://docs.python.org/
19+
PEP 257 - Python Docstring Conventions:
20+
https://www.python.org/dev/peps/pep-0257/
21+
22+
@author: Le Tuan Anh <tuananh.ke@gmail.com>
23+
@license: MIT
24+
'''
25+
26+
# Copyright (c) 2016, Le Tuan Anh <tuananh.ke@gmail.com>
27+
#
28+
# Permission is hereby granted, free of charge, to any person obtaining a copy
29+
# of this software and associated documentation files (the "Software"), to deal
30+
# in the Software without restriction, including without limitation the rights
31+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
32+
# copies of the Software, and to permit persons to whom the Software is
33+
# furnished to do so, subject to the following conditions:
34+
#
35+
# The above copyright notice and this permission notice shall be included in
36+
# all copies or substantial portions of the Software.
37+
#
38+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44+
# THE SOFTWARE.
45+
46+
########################################################################
47+
48+
import os
49+
import logging
50+
import threading
51+
from collections import OrderedDict
52+
from collections import defaultdict as dd
53+
from lxml import etree
54+
55+
from chirptext import chio
56+
from chirptext.sino import Radical
57+
58+
# ------------------------------------------------------------------------------
59+
# Configuration
60+
# ------------------------------------------------------------------------------
61+
MY_FOLDER = os.path.dirname(os.path.abspath(__file__))
62+
DATA_FOLDER = os.path.join(MY_FOLDER, 'data')
63+
KRADFILE = os.path.join(DATA_FOLDER, 'kradfile-u.gz')
64+
RADKFILE = os.path.join(DATA_FOLDER, 'radkfile.gz')
65+
66+
logger = logging.getLogger(__name__)
67+
68+
69+
########################################################################
70+
71+
class KRad:
72+
def __init__(self, **kwargs):
73+
""" Kanji Radical management
74+
"""
75+
self.__krad_map = None
76+
self.__radk_map = None
77+
self.__rads = {}
78+
self.lock = threading.Lock()
79+
80+
def _build_krad_map(self):
81+
with self.lock:
82+
lines = chio.read_file(KRADFILE, mode='rt').splitlines()
83+
# build the krad map
84+
self.__krad_map = {}
85+
self.__radk_map = dd(set)
86+
for line in lines:
87+
if line.startswith("#"):
88+
continue
89+
else:
90+
parts = line.split(':', maxsplit=1)
91+
if len(parts) == 2:
92+
rads = [r.strip() for r in parts[1].split()]
93+
char_literal = parts[0].strip()
94+
self.__krad_map[char_literal] = rads
95+
for rad in rads:
96+
self.__radk_map[rad].add(char_literal)
97+
98+
@property
99+
def radk(self):
100+
if self.__radk_map is None:
101+
self._build_krad_map()
102+
return self.__radk_map
103+
104+
@property
105+
def krad(self):
106+
if self.__krad_map is None:
107+
self._build_krad_map()
108+
return self.__krad_map

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
lxml
22
chirptext >= 0.1a18
3-
puchikarui
3+
puchikarui >= 0.1a3

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def read(*filenames, **kwargs):
5555
long_description=long_description,
5656
long_description_content_type='text/markdown',
5757
packages=['jamdict'],
58-
package_data={'jamdict': ['data/*.sql', 'data/*.json']},
58+
package_data={'jamdict': ['data/*.sql', 'data/*.json', 'data/*.gz']},
5959
include_package_data=True,
6060
platforms='any',
6161
test_suite='test',

test/test_krad.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
'''
5+
Script for testing KRad module library
6+
Latest version can be found at https://github.com/neocl/jamdict
7+
8+
References:
9+
Python documentation:
10+
https://docs.python.org/
11+
Python unittest
12+
https://docs.python.org/3/library/unittest.html
13+
--
14+
argparse module:
15+
https://docs.python.org/3/howto/argparse.html
16+
PEP 257 - Python Docstring Conventions:
17+
https://www.python.org/dev/peps/pep-0257/
18+
19+
@author: Le Tuan Anh <tuananh.ke@gmail.com>
20+
'''
21+
22+
# Copyright (c) 2016, Le Tuan Anh <tuananh.ke@gmail.com>
23+
#
24+
# Permission is hereby granted, free of charge, to any person obtaining a copy
25+
# of this software and associated documentation files (the "Software"), to deal
26+
# in the Software without restriction, including without limitation the rights
27+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
28+
# copies of the Software, and to permit persons to whom the Software is
29+
# furnished to do so, subject to the following conditions:
30+
#
31+
# The above copyright notice and this permission notice shall be included in
32+
# all copies or substantial portions of the Software.
33+
#
34+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
37+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
38+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
39+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40+
# THE SOFTWARE.
41+
42+
__author__ = "Le Tuan Anh <tuananh.ke@gmail.com>"
43+
__copyright__ = "Copyright 2016, jamdict"
44+
__license__ = "MIT"
45+
46+
########################################################################
47+
48+
import os
49+
import logging
50+
import unittest
51+
from jamdict import config
52+
from jamdict.jmdict import JMDictXMLParser
53+
from jamdict.kanjidic2 import Kanjidic2XMLParser
54+
from jamdict.krad import KRad
55+
56+
########################################################################
57+
58+
MY_DIR = os.path.abspath(os.path.dirname(__file__))
59+
TEST_DATA = os.path.join(MY_DIR, 'data')
60+
MINI_JMD = os.path.join(TEST_DATA, 'JMdict_mini.xml')
61+
MINI_KD2 = os.path.join(TEST_DATA, 'kanjidic2_mini.xml')
62+
TEST_DB = os.path.join(TEST_DATA, 'jamdict_test.db')
63+
64+
65+
def getLogger():
66+
return logging.getLogger(__name__)
67+
68+
69+
class TestConfig(unittest.TestCase):
70+
71+
def test_config(self):
72+
cfg = config.read_config()
73+
self.assertIn('KD2_XML', cfg)
74+
self.assertTrue(config.get_file('KD2_XML'))
75+
getLogger().info("jamdict log file location: {}".format(config._get_config_manager().locate_config()))
76+
77+
78+
class TestModels(unittest.TestCase):
79+
80+
def test_read_krad(self):
81+
krad = KRad()
82+
self.assertEqual(krad.krad['㘅'], ['亅', '二', '口', '彳', '金'])
83+
self.assertEqual(krad.krad['𪚲'], ['乙', '勹', '月', '田', '亀'])
84+
self.assertEqual(krad.radk['龠'], {'籥', '鸙', '龢', '龠', '龡', '籲', '瀹', '龥', '禴', '鑰', '爚', '龣'})
85+
86+
87+
########################################################################
88+
89+
if __name__ == "__main__":
90+
logging.getLogger('jamdict').setLevel(logging.DEBUG)
91+
unittest.main()

0 commit comments

Comments
 (0)