aneesha

## SiameseBERT_SemanticSearch.ipynb

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              1 star
            
          
                aneesha
                / SiameseBERT_SemanticSearch.ipynb
            
            
              Last active
              August 9, 2023 00:48
            
              
                Semantic Search with Sentence-BERT
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## display_closestwords_tsnescatterplot.ipynb

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              5 stars
            
          
                aneesha
                / display_closestwords_tsnescatterplot.ipynb
            
            
              Last active
              January 31, 2021 20:11
            
              
                Use TSNE to only plot similar words using Word2Vec
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## topicmodelling_scikitlearn.py
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.decomposition import NMF, LatentDirichletAllocation

def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print "Topic %d:" % (topic_idx)
        print " ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]])

## edx_olxformat_merge_courses.py
'''
Merge/combine courses in the OpenedX OLX format.
'''

import sys
import os
from distutils.dir_util import copy_tree
import json

# Example:

## NNDEIG.m
function [W] = NNDEIG(A,k,flag);
%
% This function implements the NNDSVD algorithm described in [1] for
% initialization of Nonnegative Matrix Factorization Algorithms
% for symmetric NMF so uses Eigendecomposition
%
% [W] = nndeig(A,k,flag);
%
% INPUT
% ------------

## display_topics_with_docs.py
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.decomposition import NMF, LatentDirichletAllocation
import numpy as np

def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
    for topic_idx, topic in enumerate(H):
        print "Topic %d:" % (topic_idx)
        print " ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]])

## display_topics_with_docs_toyexample.py
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import numpy as np

def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
    for topic_idx, topic in enumerate(H):
        print "Topic %d:" % (topic_idx)
        print " ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]])
        top_doc_indices = np.argsort( W[:,topic_idx] )[::-1][0:no_top_documents]

## dask_loadfiles_pandas.py
import dask.dataframe as dd

df = dd.read_csv('logs/2018-*.*.csv', parse_dates=['timestamp'])
df.groupby(df.timestamp.dt.hour).value.mean().compute()

## dask_array.py
import h5py
f = h5py.File('myhdf5file.hdf5')
dset = f['/data/path']

import dask.array as da
x = da.from_array(dset, chunks=(5000, 5000))

## dask_delayed_demo.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                aneesha
                / dask_delayed_demo.ipynb
            
            
              Created
              June 6, 2018 09:49
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
	from sklearn.datasets import fetch_20newsgroups
	from sklearn.decomposition import NMF, LatentDirichletAllocation

	def display_topics(model, feature_names, no_top_words):
	for topic_idx, topic in enumerate(model.components_):
	print "Topic %d:" % (topic_idx)
	print " ".join([feature_names[i]
	for i in topic.argsort()[:-no_top_words - 1:-1]])
	'''
	Merge/combine courses in the OpenedX OLX format.
	'''

	import sys
	import os
	from distutils.dir_util import copy_tree
	import json

	# Example:
	function [W] = NNDEIG(A,k,flag);
	%
	% This function implements the NNDSVD algorithm described in [1] for
	% initialization of Nonnegative Matrix Factorization Algorithms
	% for symmetric NMF so uses Eigendecomposition
	%
	% [W] = nndeig(A,k,flag);
	%
	% INPUT
	% ------------
	from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
	from sklearn.decomposition import NMF, LatentDirichletAllocation
	import numpy as np

	def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
	for topic_idx, topic in enumerate(H):
	print "Topic %d:" % (topic_idx)
	print " ".join([feature_names[i]
	for i in topic.argsort()[:-no_top_words - 1:-1]])
	top_doc_indices = np.argsort( W[:,topic_idx] )[::-1][0:no_top_documents]
	import dask.dataframe as dd

	df = dd.read_csv('logs/2018-..csv', parse_dates=['timestamp'])
	df.groupby(df.timestamp.dt.hour).value.mean().compute()
	import h5py
	f = h5py.File('myhdf5file.hdf5')
	dset = f['/data/path']

	import dask.array as da
	x = da.from_array(dset, chunks=(5000, 5000))