Skip to content

Instantly share code, notes, and snippets.

@cubedtear
Created December 31, 2016 14:49
Show Gist options
  • Save cubedtear/331dd82207627c734bcff31f990bdfe8 to your computer and use it in GitHub Desktop.
Save cubedtear/331dd82207627c734bcff31f990bdfe8 to your computer and use it in GitHub Desktop.
import subprocess
import time
from multiprocessing.pool import ThreadPool
from threading import Lock
print_lock = Lock() # Sinkronizatzeko
progress_lock = Lock()
p = print
def print(*a, **b): # Irteera estandarra ez nahasteko harien artean
with print_lock:
p(*a, **b)
progress = 0 #
def worker(data): # Azpimultzo bat ebaluatzen duen funtzioa
global progress
iteration = data[0] # Iterazio zenbakia, hau inprimatzeko
past = data[1] # Aurretik genituen aldagaiak
current = data[2] # Aldagai berria
everything = ','.join(map(str, past)) # Aurreko aldagaiak lotu, komarekin
if everything: # Ez bada hutsa, komaz separatuta sartu aldagai berria eta klasea
everything = everything + "," + str(current) + ",3676"
else: # Bestela hartuko diren aldagaiak bakarrik berria eta klasea dira
everything = str(current) + ",3676"
# Weka-ri dei egin, nahi ditugun aldagaiak soilik dituen ARFF bat sortzeko
subprocess.run("java weka.filters.unsupervised.attribute.Remove -R %s -V -i input.arff -o %d.arff" % (everything, current), shell=True,env=dict(CLASSPATH='/home/aritz/Escritorio/weka-3-8-0/weka.jar'))
# Weka-ri dei egin, 1-NN bitartez sailkatzeko sortutako ARFF fitxategia
out = subprocess.run("java weka.classifiers.lazy.IBk -K 1 -W 0 -A \"weka.core.neighboursearch.LinearNNSearch -A \\\"weka.core.EuclideanDistance -R first-last\\\"\" -v -o -t %d.arff | grep Correct | grep -P -o \"Instances\s*\d+\" | grep -P -o \"\d+\"" % current, universal_newlines=True, stdout=subprocess.PIPE, shell=True, env=dict(CLASSPATH='/home/aritz/Escritorio/weka-3-8-0/weka.jar')).stdout
with progress_lock: # Sinkronizatu, informazioa gehitu eta inprimatzeko
progress += 1
print("{:>6.2f}% of iteration: {:<2d} - Current variable: {:<4d} - Accuracy: {:<3s} - All variables taken: {:s}".format(progress/3675*100, iteration, current, out.strip(), everything))
return current, int(out.strip()) # Emaitza bezala, gehitutako aldagaia eta asmatze tasa bueltatu
done = False # Amaitu dugun ala ez
so_far = list() # Dagoeneko finkatutako aldagaiak
remaining = list(range(1, 3676)) # Gehitu ditzakegun aldagaiak
i = 1 # Iterazio zenbakia
best_result = -1 # Emaitza hoberena orain arte
while not done:
pool = ThreadPool(processes=8) # 8 hari sortu
# worker funtzioari pasako dizkiogun parametroak sortu
jobs = [(i, so_far, var) for var in remaining]
results = pool.map(worker, jobs) # Lan guztia exekutatu
pool.close() # Hariak itxi
pool.join() # Hari guztiak amaitu arte itxaron
best = max(results, key=lambda x: int(x[1])) # Asmatze tasa hoberena hartu
# Informazioa inprimatu
print("Iteration finished. Best of iteration: %d. Result: %d. So far: %s" % (best[0], best[1], ','.join(map(str, so_far))))
if best[1] < best_result: # Hoberena txikiagoa bada, amaitu dugu
print("Done. Best result: %d. Achieved with vars: %s" % (best_result, so_far))
done = True
break
remaining.remove(best[0]) # Aldagai hoberena kendu
so_far.append(best[0]) # Aldagai hoberena finkatu
i += 1 # Iterazio zenbakia handitu
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment