Skip to content

Instantly share code, notes, and snippets.

View gibrown's full-sized avatar

Greg Ichneumon Brown gibrown

View GitHub Profile
function es_api_detect_lang( $text ) {
$lang = false;
//if we can't tell the lang with 5000 characters we probably can't tell the language
$text = mb_substr( $text, 0, 5000 );
//replace non-breaking spaces so they don't match the \p{L} char class
$text = preg_replace( '/[\x{00A0}\x{2007}\x{202F}]/u', ' ', $text );
//replace unicode symbols: see: http://www.utf8-chartable.de/unicode-utf8-table.pl
@gibrown
gibrown / gist:23dc6ee65f5bcd41cd09
Created January 6, 2016 18:49
Query WP posts and weight recent posts more heavily
//These are some params I've used elsewhere, may want to try adjusting
$date_scale = '5d';
$date_decay = 0.99999;
$date_origin = date( 'Y-m-d' );
$query = array(
'query' => array(
"function_score" => array(
'query' => array( 'filtered' => array(
'query' => array( 'multi_match' => array(
'query' => $query,
<?php
class Theme_Support_Index_Builder extends WPES_Abstract_Index_Builder {
public function get_config( $args ) {
$defaults = array(
'lang' => 'en',
);
$args = wp_parse_args( $args, $defaults );
@gibrown
gibrown / bulk_test_queries.py
Created April 10, 2017 17:01
Querying wp.org plugin search
#/bin/python
import sys
import org_search as org
import pprint
import csv
#Configuration
lang='en_US'
min_count=1000
@gibrown
gibrown / index-data.sh
Created February 15, 2017 16:48
Populate an Elasticsearch index from bash and json files
#!/bin/bash
# To prep a file for this script:
# - take a list of docs orig.json with one json doc per line
# - run: split -l 1000 orig.json orig-split
export ESINDEX="$1" #ES index name
export ESTYPE="$2" #ES document type name
JSONFILE="$3" #JSON file path name. One doc per line.
@gibrown
gibrown / sim_score.py
Created March 13, 2017 22:55
Simple graphing of Elasticsearch scoring functions.
from __future__ import division
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pylab import *
import math
from scipy.stats import beta, norm, uniform
from scipy.special import betaln
from random import random, normalvariate
import numpy as np
<?php
// This uses the wpes-lib framework to build the index: https://github.com/automattic/wpes-lib
class WPOrg_Plugins_Index_Builder extends VIP_Index_Builder {
//override to add support for all analyzers
public function get_settings( $args ) {
$defaults = array(
);
$args = wp_parse_args( $args, $defaults );