Created
February 15, 2012 23:27
-
-
Save iaindooley/1840016 to your computer and use it in GitHub Desktop.
Code for generation broad, exact and phrase match keywords with optional modified broad combinations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
if(isset($argv[2]) && $argv[2]) | |
define('MODIFIED_BROAD',1); | |
else | |
define('MODIFIED_BROAD',0); | |
$lines = file($argv[1],FILE_IGNORE_NEW_LINES); | |
$res = array(); | |
foreach($lines as $line) | |
{ | |
$res[] = $line; | |
$res[] = '"'.$line.'"'; | |
$res[] = '['.$line.']'; | |
$split = explode(' ',$line); | |
if(MODIFIED_BROAD) | |
{ | |
$length = count($split); | |
$new_terms = array(); | |
//WE'RE GOING TO SLICE THE ARRAY FROM LENGTH 1 TO LENGTH OF THE | |
//TERMS | |
for($i = 1; $i <= $length ;$i++ ) | |
sliceLengths($split,$new_terms,$i); | |
foreach($new_terms as $nt) | |
{ | |
$rsplit = explode(' ',$nt); | |
$replace = '+'.implode(' +',$rsplit); | |
$res[] = str_replace($nt,$replace,$line); | |
} | |
} | |
} | |
die(implode(PHP_EOL,$res)); | |
function sliceLengths($source,&$destination,$length) | |
{ | |
$overall_length = count($source); | |
$moves = ($overall_length - $length)+1; | |
for($i = 0;$i < $moves;$i++) | |
{ | |
$cur_array = array(); | |
$cur_slice = array_slice($source,$i,$length); | |
foreach($cur_slice as $piece) | |
$cur_array[] = $piece; | |
$destination[] = implode(' ',$cur_array); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you save that to a file mung_keywords.php you can call it from the command line like this:
php mung_keywords.php filename.txt
where filename.txt contains your keyphrases, one per line, for example the file:
this
is my
research data
would produce the output:
this
"this"
[this]
is my
"is my"
[is my]
research data
"research data"
[research data]
if you pass in the optional 2nd parameter like this:
php mung_keywords.php filename.txt true
then the output includes all possible broad match modified variants:
this
"this"
[this]
+this
is my
"is my"
[is my]
+is my
is +my
+is +my
research data
"research data"
[research data]
+research data
research +data
+research +data