cdimascio · sangeethjayaprakash · Jul 13, 2023 · Apr 3, 2024 · Apr 3, 2024 · Apr 3, 2024
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 ![Travis Build](https://travis-ci.org/cdimascio/py-readability-metrics.svg?branch=master) ![Python](https://img.shields.io/badge/python-3.x-blue.svg) [![Documentation Status](https://readthedocs.org/projects/py-readability-metrics/badge/?version=latest)](https://py-readability-metrics.readthedocs.io/en/latest/?badge=latest) [![wheel](https://img.shields.io/badge/wheel-yes-ff00c9.svg)](https://pypi.org/project/py-readability-metrics/) [![](https://img.shields.io/gitter/room/cdimascio-oss/community?color=%23eb205a)](https://gitter.im/cdimascio-oss/community) [![All Contributors](https://img.shields.io/badge/all_contributors-1-orange.svg?style=flat-square)](#contributors-)
  [![MIT license](https://img.shields.io/badge/License-MIT-green.svg)](https://lbesson.mit-license.org/)
 
-Score the _readability_ of text using popular readability formulas and metrics including: [Flesch Kincaid Grade Level](#flesch-kincaid-grade-level), [Flesch Reading Ease](#flesch-reading-ease), [Gunning Fog Index](#gunning-fog), [Dale Chall Readability](#dale-chall-readability), [Automated Readability Index (ARI)](#automated-readability-index-ari), [Coleman Liau Index](#coleman-liau-index), [Linsear Write](#linsear-write), [SMOG](#smog), and [SPACHE](#spache). 📗
+Score the _readability_ of text using popular readability formulas and metrics including: [Flesch Kincaid Grade Level](#flesch-kincaid-grade-level), [Flesch Reading Ease](#flesch-reading-ease), [Gunning Fog Index](#gunning-fog), [Dale Chall Readability](#dale-chall-readability), [Automated Readability Index (ARI)](#automated-readability-index-ari), [Coleman Liau Index](#coleman-liau-index), [Linsear Write](#linsear-write), [SMOG](#smog), [SPACHE](#spache) and [Lix](#lix). 📗
 
 [![GitHub stars](https://img.shields.io/github/stars/cdimascio/py-readability-metrics.svg?style=social&label=Star&maxAge=2592000)](https://GitHub.com/cdimascio/py-readability-metrics/stargazers/) [![Twitter URL](https://img.shields.io/twitter/url/https/github.com/cdimascio/py-readability-metrics.svg?style=social)](https://twitter.com/intent/tweet?text=Check%20out%20py-readability-metrics%20by%20%40CarmineDiMascio%20https%3A%2F%2Fgithub.com%2Fcdimascio%2Fpy-readability-metrics%20%F0%9F%91%8D)
 
@@ -34,6 +34,7 @@ r.ari()
 r.linsear_write()
 r.smog()
 r.spache()
+r.lix()
 ```
 
 **\*Note:** `text` must contain >= 100 words\*
@@ -49,6 +50,7 @@ r.spache()
 - [SMOG](#smog)
 - [Spache](#spache)
 - [Linsear Write](#linsear-write)
+- [Lix](#lix)
 
 ## Readability Metric Details and Properties
 
@@ -240,6 +242,24 @@ print(lw.score)
 print(lw.grade_level)
 ```
 
+### Lix
+
+Lix (abbreviation of Swedish läsbarhetsindex, "readability index") is a readability measure for Scandinavian and West European languages developed by Carl-Hugo Björnsson. It is defined as the sum of average sentence length and the percentage of words with more than six letters. 
+
+**_call:_**
+
+```python
+r.lix()
+```
+
+**_example:_**
+
+```python
+s = r.lix()
+print(s.score)
+print(s.ease)
+```
+
 ## [Contributing](CONTRIBUTING.md)
 
 Contributions are welcome!

diff --git a/docs/source/.Rhistory b/docs/source/.Rhistory
diff --git a/docs/source/lix.rst b/docs/source/lix.rst
@@ -0,0 +1,25 @@
+Läsbarhetsindex
+===============
+
+About
+^^^^^
+
+Readability index for Swedish and other European Languages. [reference]_
+
+Usage
+^^^^^
+
+.. code-block:: python
+
+    r = Readability(text)
+
+    f = r.lix()
+
+    print(f.score)
+    print(f.ease)
+
+
+References
+----------
+
+.. [reference] `Lix (readability test) <https://en.wikipedia.org/wiki/Lix_(readability_test)>`_
diff --git a/readability/readability.py b/readability/readability.py
@@ -1,7 +1,6 @@
 from .text import Analyzer
 from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \
-    FleschKincaid, GunningFog, LinsearWrite, Smog, Spache
-import warnings
+    FleschKincaid, GunningFog, LinsearWrite, Smog, Spache, Lix
 
 class Readability:
     def __init__(self, text, min_words=100):
@@ -50,11 +49,16 @@ def smog(self,all_sentences=False, ignore_length=False):
     def spache(self):
         """Spache Index."""
         return Spache(self._statistics, self._min_words).score()
+
+    def lix(self):
+        """Läsbarhetsindex."""
+        return Lix(self._statistics, self._min_words).score()
 
     def statistics(self):
         return {
             'num_letters': self._statistics.num_letters,
             'num_words': self._statistics.num_words,
+            'num_long_words': self._statistics.num_long_words,
             'num_sentences': self._statistics.num_sentences,
             'num_polysyllabic_words': self._statistics.num_poly_syllable_words,
             'avg_words_per_sentence': self._statistics.avg_words_per_sentence,

diff --git a/readability/scorers/__init__.py b/readability/scorers/__init__.py
@@ -1,5 +1,6 @@
 
 from .flesch import Flesch
+from .lix import Lix
 from .flesch_kincaid import FleschKincaid
 from .gunning_fog import GunningFog
 from .coleman_liau import ColemanLiau

diff --git a/readability/scorers/lix.py b/readability/scorers/lix.py
@@ -0,0 +1,42 @@
+from readability.exceptions import ReadabilityException
+
+
+class Result:
+    def __init__(self, score, ease):
+        self.score = score
+        self.ease = ease
+
+    def __str__(self):
+        return "score: {}, ease: '{}'". \
+            format(self.score, self.ease)
+
+
+class Lix:
+    def __init__(self, stats, min_words=100):
+        self._stats = stats
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
+
+    def score(self):
+        score = self._score()
+        return Result(
+            score=score,
+            ease=self._ease(score))
+
+    def _score(self):
+        stats = self._stats
+        words_per_sent = stats.num_words / stats.num_sentences
+        percentage_long_words = stats.num_long_words / stats.num_words * 100
+        return words_per_sent + percentage_long_words
+
+    def _ease(self, score):
+        if score > 60:
+            return 'very_difficult'
+        elif score > 50 and score <= 60:
+            return 'difficult'
+        elif score > 40 and score <= 50:
+            return 'medium difficulty'
+        elif score > 30 and score <= 40:
+            return 'easy reading'
+        else:
+            return 'very easy'
diff --git a/readability/text/analyzer.py b/readability/text/analyzer.py
@@ -25,6 +25,10 @@ def num_letters(self):
     def num_words(self):
         return self.stats['num_words']
 
+    @property
+    def num_long_words(self):
+        return self.stats['num_long_words']
+
     @property
     def num_sentences(self):
         return self.stats['num_sentences']
@@ -71,6 +75,7 @@ def _statistics(self, text):
         syllable_count = 0
         poly_syllable_count = 0
         word_count = 0
+        long_word_count = 0
         letters_count = 0
         gunning_complex_count = 0
         dale_chall_complex_count = 0
@@ -92,11 +97,14 @@ def is_spache_complex(t):
 
         for t in tokens:
 
+            num_word_letters = 0
             if not self._is_punctuation(t):
                 word_count += 1
                 word_syllable_count = count_syllables(t)
                 syllable_count += word_syllable_count
                 letters_count += len(t)
+                word_num_letters = len(t)
+                long_word_count += 1 if word_num_letters > 6 else 0
                 poly_syllable_count += 1 if word_syllable_count >= 3 else 0
                 gunning_complex_count += \
                     1 if is_gunning_complex(t, word_syllable_count) \
@@ -113,6 +121,7 @@ def is_spache_complex(t):
             'num_syllables': syllable_count,
             'num_poly_syllable_words': poly_syllable_count,
             'num_words': word_count,
+            'num_long_words': long_word_count,
             'num_sentences': sentence_count,
             'num_letters': letters_count,
             'num_gunning_complex': gunning_complex_count,

diff --git a/test/test_readability.py b/test/test_readability.py
@@ -36,6 +36,16 @@ def test_flesch(self):
         self.assertEqual(['10', '11', '12'], r.grade_levels)
         self.assertEqual('fairly_difficult', r.ease)
 
+    def test_lix(self):
+    	text = """Läsbarhetsindex (LIX) kan användas för att få uppfattning om hur lätt eller svår en text är att läsa. LIX är baserat på medeltalet ord per mening och andelen långa ord (ord med fler än 6 bokstäver) uttryckt i procent. Det finns flera olika läsbarhetsindex, men i 		Sverige är LIX det mest använda. LIX utvecklades på 1960-talet av pedagogikforskaren Carl-Hugo Björnsson. 
+	Läsbarhetsindex (LIX) kan användas för att få uppfattning om hur lätt eller svår en text är att läsa. LIX är baserat på medeltalet ord per mening och andelen långa ord (ord med fler än 6 bokstäver) uttryckt i procent. Det finns flera olika läsbarhetsindex, men i Sverige är LIX det mest använda. LIX utvecklades på 1960-talet av pedagogikforskaren Carl-Hugo Björnsson. 
+    	"""
+    	readability = Readability(text)
+    	r = readability.lix()
+    	print(r)
+    	self.assertEqual(41.47950819672131, r.score)
+    	self.assertEqual('medium difficulty', r.ease)
+
     def test_flesch_kincaid(self):
         r = self.readability.flesch_kincaid()
         print(r)
@@ -90,3 +100,7 @@ def test_print_stats(self):
         self.assertEqual(117, stats['num_words'])
         self.assertEqual(7, stats['num_sentences'])
         self.assertEqual(20, stats['num_polysyllabic_words'])
+
+
+
+