Created
April 20, 2019 03:49
-
-
Save ridhof/313d5a522ea758d3460c4f29e0c23829 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once __DIR__.'/vendor/autoload.php'; | |
use Phpml\FeatureExtraction\TokenCountVectorizer; | |
use Phpml\Tokenization\WhitespaceTokenizer; | |
use Phpml\FeatureExtraction\TfIdfTransformer; | |
use Phpml\Math\Distance\Manhattan; | |
use Phpml\Math\Distance\Minkowski; | |
$sample_data = [ | |
'dolar naik harga naik hasil turun', | |
'harga naik harus gaji naik', | |
'premium tidak pengaruh dolar', | |
'harga laptop naik', | |
'naik harga' | |
]; | |
$tf = new TokenCountVectorizer(new WhitespaceTokenizer()); | |
$tf->fit($sample_data); | |
$tf->transform($sample_data); | |
$vocabulary = $tf->getVocabulary(); | |
$i = 1; | |
// Menghitung Term Frequency (TF) | |
echo "<b>TERM FREQUENCY</b><br><br>"; | |
echo "<table border='1'>"; | |
echo "<tr><th align='center'></th>"; | |
foreach($vocabulary as $term){ | |
echo "<th align='center'>".$term."</th>"; | |
} | |
echo "</tr>"; | |
foreach($sample_data as $isi){ | |
if($i == count($sample_data)){ | |
echo "<tr><td>Q</td>"; | |
} | |
else{ | |
echo "<tr><td>D".$i."</td>"; | |
} | |
foreach($isi as $item){ | |
echo "<td>".$item."</td>"; | |
} | |
echo "</tr>"; | |
$i++; | |
} | |
echo "</table><br><br>"; | |
// Menghitung TF-IDF | |
$tfidf = new TfIdfTransformer($sample_data); | |
$tfidf->transform($sample_data); | |
$i = 1; | |
echo "<b>TF-IDF</b><br><br>"; | |
echo "<table border='1'>"; | |
echo "<tr><th align='center'></th>"; | |
foreach($vocabulary as $term){ | |
echo "<th align='center'>".$term."</th>"; | |
} | |
echo "</tr>"; | |
foreach($sample_data as $isi){ | |
if($i == count($sample_data)){ | |
echo "<tr><td>Q</td>"; | |
} | |
else{ | |
echo "<tr><td>D".$i."</td>"; | |
} | |
foreach($isi as $item){ | |
echo "<td>".round($item, 1)."</td>"; | |
} | |
echo "</tr>"; | |
$i++; | |
} | |
echo "</table><br><br>"; | |
// Menghitung Manhattan dengan Minkowski($lambda = 1.0) | |
$minkowski = new Minkowski(1.0); | |
$i = 1; | |
echo "<b>Manhattan dengan Minkowski(lambda = 1.0)</b><br><br>"; | |
foreach($sample_data as $isi){ | |
//Manhattan Distance | |
if($i != count($sample_data)){ | |
$manhattanDistance = $minkowski->distance($sample_data[count($sample_data)-1], $isi); | |
echo "D".$i." dan Q = ".round($manhattanDistance, 2)."<br>"; | |
} | |
$i++; | |
} | |
echo "<br><br>"; | |
// Menghitung Manhattan dengan Manhattan() | |
$manhattan = new Manhattan(); | |
$i = 1; | |
echo "<b>Manhattan dengan Manhattan()</b><br><br>"; | |
foreach($sample_data as $isi){ | |
//Manhattan Distance | |
if($i != count($sample_data)){ | |
$manhattanDistance = $manhattan->distance($sample_data[count($sample_data)-1], $isi); | |
echo "D".$i." dan Q = ".round($manhattanDistance, 2)."<br>"; | |
} | |
$i++; | |
} | |
echo "<br><br>"; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment