Skip to content

Instantly share code, notes, and snippets.

View AlexDel's full-sized avatar

Alexander Kalinin AlexDel

View GitHub Profile
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.http import FormRequest
from scrapy.selector import Selector
from spider.spiders.basic import StudentSpider
from spider.items import StudentItem
from scrapy import log
class XPATHS:
$('.note-editable').on('keyup', function(){$('[name="fromSummernote"]').val($(this).text())})
AlexDel / gist:c3b713c9ffde0f7589e7
Created December 30, 2014 13:23
contracted popover
//choose the button by id to add "Add type popover" with need parameters
//popover header
popOverHeader: 'Add Type',
//this is the input form id from which new type name is taken
popOverFormInputId: 'NewType',
//this is the input form name from which new type name is taken
popOverFormInputName: 'NewType',
people_list = [
{name: 'A',
friends: 'B','C'},
{name: 'D',
friends: 'E','A'},
friends_groups = 0
for person1 in people_list:
//appDir: '../js/apps',
baseUrl: "../js", // Define our base URL - all module paths are relative to this base directory
dir: '../www-built/js',
//dir: '../../server-side/assets/SPAS',
mainConfigFile: '../src_modules/config-require.js',
optimizeCss: "none",
optimize: "none", //"uglify",
findNestedDependencies: true,
AlexDel / 1
Last active August 29, 2015 14:22
dataset = [
(vector1, True),
(vector2, False),
(vector3, False),
train_data = [i[0] for i in dataset]
train_labels = [i[1] for i in dataset], train_labels)
AlexDel / gist:1588878
Created January 10, 2012 12:45
Genre diversity score. Считаем лексическую насыщенность в корпусе Брауна и выводим данные. NLTK Упр 2.16
import nltk
from __future__ import division
for genre in nltk.corpus.brown.categories():
words = nltk.corpus.brown.words(categories = genre)
print genre +' - ' + str(round((len(set(words))/len(words)),6)*100) + '%'
AlexDel / gist:1589515
Created January 10, 2012 15:10
NLTK. Ex 2.18 Write a program to print the 50 most frequent bigrams (pairs of adjacent words) of a text, omitting bigrams that contain stopwords.
def top_bigrams(text):
fdist = nltk.probability.FreqDist(nltk.bigrams(text)) #формируем список кортежей биграмм
stopwords = nltk.corpus.stopwords.words('english') #формируем стоплист
top_list = [(x,y) for x,y in fdist.keys() if x.isalpha() and y.isalpha() and x not in stopwords and y not in stopwords] #показываем только если элементы кортежа - слова и невходят в стоплист
return top_list
AlexDel / gist:1593588
Created January 11, 2012 07:39
NLTK EX 2.20 Write a function word_freq() that takes a word and the name of a section of the Brown Corpus as arguments, and computes the frequency of the word in that section of the corpus.
def word_freq(word, section):
freq = nltk.probability.FreqDist(nltk.corpus.brown.words(categories = section))
word_frequency = freq[word]
return word_frequency
AlexDel / gist:1593818
Created January 11, 2012 09:05
NLTK Ex 2.21 Write a program to guess the number of syllables contained in a text, making use of the CMU Pronouncing Dictionary
d = nltk.corpus.cmudict.dict() #получаем объект в виде словаря для удобного доступа
def count_syllables(text): #вводим текст как список слов
syll_text = [] #исходный массив где будут копиться слоги
for word in text:
syll_text.extend(d[word][0]) #к исходному массиву добавляем первый элемент (в случае нескольких произношений)с помощью метода extend
return len(syll_text)# ву-а-ля