Skip to content

Commit 252f6c8

Browse files
committed
Implemented tests to Statistic measures ngrams
Implemented tests to the statistic measures of the ngrams
1 parent 33b86c9 commit 252f6c8

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
<?php
2+
3+
namespace Tests\TextAnalysis\NGrams;
4+
5+
use TextAnalysis\NGrams\StatisticFacade;
6+
use TextAnalysis\Tokenizers\RegexTokenizer;
7+
use TextAnalysis\NGrams\NGramFactory;
8+
9+
/**
10+
* Description of NGramFactoryTest
11+
*
12+
* @author yooper <yooper>
13+
*/
14+
class StatisticFacadeTest extends \PHPUnit_Framework_TestCase
15+
{
16+
private $text;
17+
private $tokens;
18+
19+
public function __construct() {
20+
$this->text = file_get_contents(TEST_DATA_DIR . DS . 'Text'.DS.'Analysis'.DS.'text_ngrams.txt');
21+
22+
$tokenizer = new RegexTokenizer('/([\p{L}]+[\/\-_\']?[\p{L}]+)+|[\p{L}]+/iu');
23+
$this->tokens = normalize_tokens($tokenizer->tokenize($this->text));
24+
}
25+
26+
public function testBigrams()
27+
{
28+
$ngrams = NGramFactory::create($this->tokens, 2, '<>');
29+
$ngrams = NGramFactory::getFreq($ngrams, '<>');
30+
31+
//test frequency
32+
$this->assertEquals($ngrams['know<>something'], array( 0=>2, 1=> 3, 2 => 2));
33+
34+
//test tmi measure
35+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'tmi', 2);
36+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 0.1612);
37+
38+
//test ll measure
39+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'll', 2);
40+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 13.8516);
41+
42+
//test pmi measure
43+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'pmi', 2);
44+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 4.3692);
45+
46+
//test dice measure
47+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'dice', 2);
48+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 0.8000);
49+
50+
//test x2 measure
51+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'x2', 2);
52+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 40.6444);
53+
54+
//test tscore measure
55+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'tscore', 2);
56+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 1.3458);
57+
58+
//test phi measure
59+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'phi', 2);
60+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 0.6556);
61+
62+
//test odds measure
63+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'odds', 2);
64+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 118.0000);
65+
66+
//test leftFisher measure
67+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'leftFisher', 2);
68+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 1.0000);
69+
70+
//test rightFisher measure
71+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'rightFisher', 2);
72+
$this->assertEquals(round($ngrams_stats['know<>something'], 4), 0.0016);
73+
}
74+
75+
public function testTrigrams()
76+
{
77+
$ngrams = NGramFactory::create($this->tokens, 3, '<>');
78+
$ngrams = NGramFactory::getFreq($ngrams, '<>');
79+
80+
//test frequency
81+
$this->assertEquals($ngrams['the<>know<>something'], array( 0 => 1, 1 => 4, 2 => 3, 3 => 2, 4 => 1, 5 => 1, 6 => 2));
82+
83+
//test tmi measure
84+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'tmi', 3);
85+
$this->assertEquals(round($ngrams_stats['the<>know<>something'], 4), 0.2002);
86+
87+
//test ll measure
88+
$ngrams_stats = StatisticFacade::calculate($ngrams, 'll', 3);
89+
$this->assertEquals(round($ngrams_stats['the<>know<>something'], 4), 16.9283);
90+
}
91+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
something interesting the mock turtle heavy sobslastly she pictured to herself as she could the know something interesting Dormouse is asleep again said the hatterand he wasn t going to shrink any further she felt certain it must be shutting up like telescopesthis time with one fingeras he fumbled over the listfeeling very curious to know what it means i know something interesting

0 commit comments

Comments
 (0)