@@ -63,19 +63,23 @@ static public function getFreq(array $ngrams, string $sep = ' ') : array
63
63
//creates an array of tokens per ngram
64
64
$ ngramsArray = self ::ngramsAsArray ($ sep , $ ngrams );
65
65
66
+ $ ngramSize = count ($ ngramsArray [0 ]);
67
+
68
+ $ tokens_frequencies = self ::readFreq ($ sep , $ ngrams );
69
+ $ combo_frequencies = self ::readCombFreq ($ sep , $ ngrams );
70
+
66
71
//interate the array with no repeated ngrams
67
72
foreach ($ ngramsUnique as $ ngramString => $ ngramFrequency ) {
68
73
$ ngramsFinal [$ ngramString ] = array ($ ngramFrequency ); //putting into the final array an array of frequencies (first, the ngram frequency)
69
74
70
75
$ ngramArray = explode ($ sep , $ ngramString ); //getting an array of tokens of the ngram
71
- $ ngramSize = count ($ ngramArray ); //getting the size of ngram
72
76
foreach ($ ngramArray as $ kToken => $ token ) { //iterating the array of tokens of the ngram
73
- $ ngramsFinal [$ ngramString ][$ kToken +1 ] = self :: countFreq ( $ ngramsArray , $ token, $ kToken) ; //getting the frequency of the token
77
+ $ ngramsFinal [$ ngramString ][$ kToken +1 ] = $ tokens_frequencies [ $ token][ $ kToken] ; //getting the frequency of the token
74
78
75
79
if ($ ngramSize > 2 ) {
76
80
//getting the combined frequency of the tokens
77
81
for ($ i = $ kToken +1 ; $ i < $ ngramSize ; $ i ++) {
78
- $ ngramsFinal [$ ngramString ][$ ngramSize +$ kToken +$ i ] = self :: countFreq ( $ ngramsArray , $ token, $ kToken , $ ngramArray [$ i ], $ i ) ;
82
+ $ ngramsFinal [$ ngramString ][$ ngramSize +$ kToken +$ i ] = $ combo_frequencies [ $ token. $ sep . $ ngramArray [$ i ]][ $ kToken ][ $ i ] ;
79
83
}
80
84
}
81
85
}
@@ -86,32 +90,50 @@ static public function getFreq(array $ngrams, string $sep = ' ') : array
86
90
}
87
91
88
92
/**
89
- * Count the number of times the given string(s) to the given position(s) occurs in the given ngrams array.
90
- * @param array $ngramsArray
91
- * @param string $str1
92
- * @param int $pos1
93
- * @param string $str2
94
- * @param int $pos2
95
- * @return int $count return the frequency
93
+ * Counts the frequency of each token of an ngram array
94
+ * @param string $sep
95
+ * @param array $ngrams
96
+ * @return array $frequencies Return an array of tokens with its frequencies by its positions
96
97
*/
97
- static private function countFreq ( array $ ngramsArray , string $ str1 , int $ pos1 , string $ str2 = null , int $ pos2 = null ) : int
98
+ static public function readFreq ( string $ sep , array $ ngrams ) : array
98
99
{
99
- $ count = 0 ;
100
-
101
- //counts the number of times the given string(s) to the given position(s) occurs in the given ngrams array.
102
- foreach ($ ngramsArray as $ ngramArray ) {
103
- if ($ str1 === $ ngramArray [$ pos1 ]) {
104
- if (isset ($ str2 ) && isset ($ pos2 )) {
105
- if ($ str2 === $ ngramArray [$ pos2 ]) {
106
- $ count ++;
107
- }
100
+ $ ngrams = self ::ngramsAsArray ($ sep , $ ngrams );
101
+ $ frequencies = array ();
102
+ foreach ($ ngrams as $ ngram ) {
103
+ foreach ($ ngram as $ pos => $ token ) {
104
+ if (isset ($ frequencies [$ token ][$ pos ])) { //checks if the token in that position was already counted
105
+ $ frequencies [$ token ][$ pos ] += 1 ;
108
106
} else {
109
- $ count ++ ;
107
+ $ frequencies [ $ token ][ $ pos ] = 1 ;
110
108
}
111
109
}
112
110
}
111
+ return $ frequencies ;
112
+ }
113
+
114
+ /**
115
+ * Counts the frequency of combo of tokens of an ngram array
116
+ * @param string $sep
117
+ * @param array $ngrams
118
+ * @return array $frequencies Return an array of a combo of tokens with its frequencies by its positions
119
+ */
120
+ static public function readCombFreq (string $ sep , array $ ngrams ) : array
121
+ {
122
+ $ ngrams = self ::ngramsAsArray ($ sep , $ ngrams );
123
+ $ frequencies = array ();
124
+ foreach ($ ngrams as $ ngram ) {
125
+ foreach ($ ngram as $ posToken => $ token ) {
126
+ for ($ i = $ posToken +1 ; $ i < count ($ ngram ); $ i ++) {
127
+ if (isset ($ frequencies [$ token .$ sep .$ ngram [$ i ]][$ posToken ][$ i ])) { //checks if the combo already exists
128
+ $ frequencies [$ token .$ sep .$ ngram [$ i ]][$ posToken ][$ i ] += 1 ;
129
+ } else {
130
+ $ frequencies [$ token .$ sep .$ ngram [$ i ]][$ posToken ][$ i ] = 1 ;
131
+ }
132
+ }
133
+ }
113
134
114
- return $ count ;
135
+ }
136
+ return $ frequencies ;
115
137
}
116
138
117
139
/**
0 commit comments