Dr. Ryan Heuser quadrismegistus

## logwatch.py
# !pip install humanfriendly loguru

from humanfriendly import format_timespan
from loguru import logger
import time

class Logwatch:
    def __init__(self, name='Task', level='DEBUG'):
        self.started = None
        self.ended = None

## nb2md.py
#!/usr/bin/env python3
import sys,os,bs4

def nb2py(fn):
    if not os.path.exists(fn): return
    os.system(f'jupyter nbconvert --to markdown {fn}')
    fn_md=os.path.splitext(fn)[0]+'.md'
    if not os.path.exists(fn_md): return
    with open(fn_md) as f: txt=f.read()
    dom=bs4.BeautifulSoup(txt,'lxml')

## lltk-readme.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                quadrismegistus
                / lltk-readme.ipynb
            
            
              Last active
              June 17, 2022 08:13
            
              
                lltk-readme.ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pmap.py
"""
Simple mofo'n parallelism with progress bar. Born of frustration with p_tqdm.
"""


def pmap_do(inp):

## brute_txt.py
# install
# pip install bs4 fulltext epub-conversion pymupdf requests xml_cleaner html2text kitchen -q

# imports
import os
from kitchen.text.converters import to_unicode
# constants
WORKING_EXTS={'txt','pdf','epub','html','xml','htm'}
CONTENT_TAGS={'xml':['p'],'html':['p'],'htm':['p'],'epub':['p']}

## opentext.ipynb

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                quadrismegistus
                / opentext.ipynb
            
            
              Created
              August 6, 2019 23:08
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pyvis_for_networkx.py
def draw_graph3(networkx_graph,notebook=True,output_filename='graph.html',show_buttons=True,only_physics_buttons=False):
    """
    This function accepts a networkx graph object,
    converts it to a pyvis network object preserving its node and edge attributes,
    and both returns and saves a dynamic network visualization.

    Valid node attributes include:
        "size", "value", "title", "x", "y", "label", "color".

        (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_node)

## gensim_word2vec_measure_semantic_shift_by_local_neighborhood.py
def measure_semantic_shift_by_neighborhood(model1,model2,word,k=25,verbose=False):
    """
    Basic implementation of William Hamilton (@williamleif) et al's measure of semantic change
    proposed in their paper "Cultural Shift or Linguistic Drift?" (https://arxiv.org/abs/1606.02821),
    which they call the "local neighborhood measure." They find this measure better suited to understand
    the semantic change of nouns owing to "cultural shift," or changes in meaning "local" to that word,
    rather than global changes in language ("linguistic drift") use that are better suited to a
    Procrustes-alignment method (also described in the same paper.)

    Arguments are:

## gensim_word2vec_make_semantic_network.py
"""
Code to make a network out of the shortest N cosine-distances (or, equivalently, the strongest N associations)
between a set of words in a gensim word2vec model.

To use:
Set the filenames for the word2vec model.
Set `my_words` to be a list of your own choosing.
Set `num_top_dists` to be a number or a factor of the length of `my_words.`
Choose between the two methods below to produce distances, and comment-out the other one.
"""

## gensim_word2vec_procrustes_align.py
def smart_procrustes_align_gensim(base_embed, other_embed, words=None):
	"""Procrustes align two gensim word2vec models (to allow for comparison between same word across models).
	Code ported from HistWords <https://github.com/williamleif/histwords> by William Hamilton <wleif@stanford.edu>.
		(With help from William. Thank you!)

	First, intersect the vocabularies (see `intersection_align_gensim` documentation).
	Then do the alignment on the other_embed model.
	Replace the other_embed model's syn0 and syn0norm numpy matrices with the aligned version.
	Return other_embed.
	# !pip install humanfriendly loguru

	from humanfriendly import format_timespan
	from loguru import logger
	import time

	class Logwatch:
	def __init__(self, name='Task', level='DEBUG'):
	self.started = None
	self.ended = None
	#!/usr/bin/env python3
	import sys,os,bs4

	def nb2py(fn):
	if not os.path.exists(fn): return
	os.system(f'jupyter nbconvert --to markdown {fn}')
	fn_md=os.path.splitext(fn)[0]+'.md'
	if not os.path.exists(fn_md): return
	with open(fn_md) as f: txt=f.read()
	dom=bs4.BeautifulSoup(txt,'lxml')
	"""
	Simple mofo'n parallelism with progress bar. Born of frustration with p_tqdm.
	"""






	def pmap_do(inp):
	# install
	# pip install bs4 fulltext epub-conversion pymupdf requests xml_cleaner html2text kitchen -q

	# imports
	import os
	from kitchen.text.converters import to_unicode
	# constants
	WORKING_EXTS={'txt','pdf','epub','html','xml','htm'}
	CONTENT_TAGS={'xml':['p'],'html':['p'],'htm':['p'],'epub':['p']}
	def draw_graph3(networkx_graph,notebook=True,output_filename='graph.html',show_buttons=True,only_physics_buttons=False):
	"""
	This function accepts a networkx graph object,
	converts it to a pyvis network object preserving its node and edge attributes,
	and both returns and saves a dynamic network visualization.

	Valid node attributes include:
	"size", "value", "title", "x", "y", "label", "color".

	(For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_node)
	def measure_semantic_shift_by_neighborhood(model1,model2,word,k=25,verbose=False):
	"""
	Basic implementation of William Hamilton (@williamleif) et al's measure of semantic change
	proposed in their paper "Cultural Shift or Linguistic Drift?" (https://arxiv.org/abs/1606.02821),
	which they call the "local neighborhood measure." They find this measure better suited to understand
	the semantic change of nouns owing to "cultural shift," or changes in meaning "local" to that word,
	rather than global changes in language ("linguistic drift") use that are better suited to a
	Procrustes-alignment method (also described in the same paper.)

	Arguments are:
	"""
	Code to make a network out of the shortest N cosine-distances (or, equivalently, the strongest N associations)
	between a set of words in a gensim word2vec model.

	To use:
	Set the filenames for the word2vec model.
	Set `my_words` to be a list of your own choosing.
	Set `num_top_dists` to be a number or a factor of the length of `my_words.`
	Choose between the two methods below to produce distances, and comment-out the other one.
	"""
	def smart_procrustes_align_gensim(base_embed, other_embed, words=None):
	"""Procrustes align two gensim word2vec models (to allow for comparison between same word across models).
	Code ported from HistWords <https://github.com/williamleif/histwords> by William Hamilton <wleif@stanford.edu>.
	(With help from William. Thank you!)

	First, intersect the vocabularies (see `intersection_align_gensim` documentation).
	Then do the alignment on the other_embed model.
	Replace the other_embed model's syn0 and syn0norm numpy matrices with the aligned version.
	Return other_embed.