Skip to content

Instantly share code, notes, and snippets.

@ridhof
Created April 20, 2019 03:49
Show Gist options
  • Save ridhof/313d5a522ea758d3460c4f29e0c23829 to your computer and use it in GitHub Desktop.
Save ridhof/313d5a522ea758d3460c4f29e0c23829 to your computer and use it in GitHub Desktop.
<?php
require_once __DIR__.'/vendor/autoload.php';
use Phpml\FeatureExtraction\TokenCountVectorizer;
use Phpml\Tokenization\WhitespaceTokenizer;
use Phpml\FeatureExtraction\TfIdfTransformer;
use Phpml\Math\Distance\Manhattan;
use Phpml\Math\Distance\Minkowski;
$sample_data = [
'dolar naik harga naik hasil turun',
'harga naik harus gaji naik',
'premium tidak pengaruh dolar',
'harga laptop naik',
'naik harga'
];
$tf = new TokenCountVectorizer(new WhitespaceTokenizer());
$tf->fit($sample_data);
$tf->transform($sample_data);
$vocabulary = $tf->getVocabulary();
$i = 1;
// Menghitung Term Frequency (TF)
echo "<b>TERM FREQUENCY</b><br><br>";
echo "<table border='1'>";
echo "<tr><th align='center'></th>";
foreach($vocabulary as $term){
echo "<th align='center'>".$term."</th>";
}
echo "</tr>";
foreach($sample_data as $isi){
if($i == count($sample_data)){
echo "<tr><td>Q</td>";
}
else{
echo "<tr><td>D".$i."</td>";
}
foreach($isi as $item){
echo "<td>".$item."</td>";
}
echo "</tr>";
$i++;
}
echo "</table><br><br>";
// Menghitung TF-IDF
$tfidf = new TfIdfTransformer($sample_data);
$tfidf->transform($sample_data);
$i = 1;
echo "<b>TF-IDF</b><br><br>";
echo "<table border='1'>";
echo "<tr><th align='center'></th>";
foreach($vocabulary as $term){
echo "<th align='center'>".$term."</th>";
}
echo "</tr>";
foreach($sample_data as $isi){
if($i == count($sample_data)){
echo "<tr><td>Q</td>";
}
else{
echo "<tr><td>D".$i."</td>";
}
foreach($isi as $item){
echo "<td>".round($item, 1)."</td>";
}
echo "</tr>";
$i++;
}
echo "</table><br><br>";
// Menghitung Manhattan dengan Minkowski($lambda = 1.0)
$minkowski = new Minkowski(1.0);
$i = 1;
echo "<b>Manhattan dengan Minkowski(lambda = 1.0)</b><br><br>";
foreach($sample_data as $isi){
//Manhattan Distance
if($i != count($sample_data)){
$manhattanDistance = $minkowski->distance($sample_data[count($sample_data)-1], $isi);
echo "D".$i." dan Q = ".round($manhattanDistance, 2)."<br>";
}
$i++;
}
echo "<br><br>";
// Menghitung Manhattan dengan Manhattan()
$manhattan = new Manhattan();
$i = 1;
echo "<b>Manhattan dengan Manhattan()</b><br><br>";
foreach($sample_data as $isi){
//Manhattan Distance
if($i != count($sample_data)){
$manhattanDistance = $manhattan->distance($sample_data[count($sample_data)-1], $isi);
echo "D".$i." dan Q = ".round($manhattanDistance, 2)."<br>";
}
$i++;
}
echo "<br><br>";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment