Skip to content

Instantly share code, notes, and snippets.

View gibrown's full-sized avatar

Greg Ichneumon Brown gibrown

View GitHub Profile
@gibrown
gibrown / bulk_test_queries.py
Created April 10, 2017 17:01
Querying wp.org plugin search
#/bin/python
import sys
import org_search as org
import pprint
import csv
#Configuration
lang='en_US'
min_count=1000
@gibrown
gibrown / sim_score.py
Created March 13, 2017 22:55
Simple graphing of Elasticsearch scoring functions.
from __future__ import division
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pylab import *
import math
from scipy.stats import beta, norm, uniform
from scipy.special import betaln
from random import random, normalvariate
import numpy as np
@gibrown
gibrown / index-data.sh
Created February 15, 2017 16:48
Populate an Elasticsearch index from bash and json files
#!/bin/bash
# To prep a file for this script:
# - take a list of docs orig.json with one json doc per line
# - run: split -l 1000 orig.json orig-split
export ESINDEX="$1" #ES index name
export ESTYPE="$2" #ES document type name
JSONFILE="$3" #JSON file path name. One doc per line.
<?php
// This uses the wpes-lib framework to build the index: https://github.com/automattic/wpes-lib
class WPOrg_Plugins_Index_Builder extends VIP_Index_Builder {
//override to add support for all analyzers
public function get_settings( $args ) {
$defaults = array(
);
$args = wp_parse_args( $args, $defaults );
<?php
class Theme_Support_Index_Builder extends WPES_Abstract_Index_Builder {
public function get_config( $args ) {
$defaults = array(
'lang' => 'en',
);
$args = wp_parse_args( $args, $defaults );
@gibrown
gibrown / gist:23dc6ee65f5bcd41cd09
Created January 6, 2016 18:49
Query WP posts and weight recent posts more heavily
//These are some params I've used elsewhere, may want to try adjusting
$date_scale = '5d';
$date_decay = 0.99999;
$date_origin = date( 'Y-m-d' );
$query = array(
'query' => array(
"function_score" => array(
'query' => array( 'filtered' => array(
'query' => array( 'multi_match' => array(
'query' => $query,
diff --git a/src/common/class.wpes-analyzer-builder.php b/src/common/class.wpes-analyzer-builder.php
index 0357ea4..5787479 100644
--- a/src/common/class.wpes-analyzer-builder.php
+++ b/src/common/class.wpes-analyzer-builder.php
@@ -340,6 +340,29 @@ class WPES_Analyzer_Builder {
continue;
}
+ if ( 'de' == $lang ) {
+ ////From: http://gibrown.wordpress.com/2013/05/01/three-principles-for-multilingal-indexing-in-elasticsearch/#comment-857
@gibrown
gibrown / gist:9419022
Created March 7, 2014 20:11
WP.com related posts query building
public static function post( $blog_id, $post_id, $mlt_fields = array( 'mlt_content' ), $analyzer = false ) {
switch_to_blog( $blog_id );
$post = get_post( $post_id );
$fld_bldr = new ES_WP_Field_Builder();
$tax_data = $fld_bldr->taxonomy( $post );
$mlt_content = $fld_bldr->mlt_content( array(
'title' => $fld_bldr->clean_string( $post->post_title ),
'content' => $fld_bldr->clean_string( $post->post_content ),
function es_api_detect_lang( $text ) {
$lang = false;
//if we can't tell the lang with 5000 characters we probably can't tell the language
$text = mb_substr( $text, 0, 5000 );
//replace non-breaking spaces so they don't match the \p{L} char class
$text = preg_replace( '/[\x{00A0}\x{2007}\x{202F}]/u', ' ', $text );
//replace unicode symbols: see: http://www.utf8-chartable.de/unicode-utf8-table.pl
Մեր հÕ րևÕ Õ բÕ կում երկու եղբÕ յր Õ Õ Õ Õ պրում ՄÕ օր եղբÕ յրÕ երը որոÕ եցÕ Õ բÕ րձրÕ Õ Õ լ Õ րեÕ ց Õ Õ Õ Õ Õ Õ Õ քը ՔÕ Õ եցÕ Õ բերեցÕ Õ ծÕ Õ ր սÕ Õ դուղքը ու բÕ րձրÕ ցÕ Õ մութ Õ Õ Õ Õ քը Փոքր եղբÕ յրը Õ Õ Õ վÕ խկոÕ Õ ր Õ սկ մեծ եղբÕ յրը Õ Õ ըÕ դհÕ Õ Õ րÕ Õ հÕ մոզում Õ ր որ Õ մեÕ Õ Õ չ լÕ վ կլÕ Õ Õ որ չվÕ խեÕ Õ ՀÕ զÕ վ Õ Õ Õ բÕ րձրÕ ցել վերև երբ փոքր եղբÕ յրը չÕ կÕ Õ եց գեÕ Õ Õ Õ դրվÕ ծ Õ Õ խÕ Õ կÕ երը ոÕ քով դÕ պÕ վ դրÕ Õ ց ու վÕ յր ըÕ կÕ վ ՔÕ Õ Õ որ փոքր Õ ր Õ սկույÕ սկսեց լÕ ց լÕ Õ ել Õ սկ մեծ եղբÕ յրը Õ րÕ Õ հÕ Õ գսÕ Õ ցրեց Õ սելով որ դÕ կլÕ Õ Õ Õ րեÕ ց գÕ ղÕ Õ Õ քը և ոչ ոք Õ յդ մÕ սÕ Õ չÕ Õ մÕ Õ Õ
WordPress.com Debug
11:42 AM (32 minutes ago)
to greg
Text: