João joaorafaelm

## Dockerfile
#!/usr/bin/env -S bash -c "docker run -p 8080:8080 -it --rm \$(docker build --progress plain -f \$0 . 2>&1 | tee /dev/stderr | grep -oP 'sha256:[0-9a-f]*')"

# syntax = docker/dockerfile:1.4.0

FROM node:20

WORKDIR /root

RUN npm install sqlite3

## phrase_detection.sql
WITH tokens AS (
  -- Just edit MY_TABLE, MY_TEXT_COL, and MY_PKEY_COL, and watch it go!
  SELECT MY_PKEY_COL AS pkey, (unnest(to_tsvector(MY_TEXT_COL))).* FROM MY_TABLE
), token_stream AS (
  SELECT pkey, unnest(positions) AS token_idx, lexeme
  FROM tokens ORDER BY pkey, token_idx
), token_counts AS (
  SELECT lexeme, sum(count) AS count
  FROM (
    SELECT lexeme, array_length(positions, 1) AS count FROM tokens

## understanding-word-vectors.ipynb

      
              1 file
            
          
              351 forks
            
          
              51 comments
            
          
              1443 stars
            
          
                aparrish
                / understanding-word-vectors.ipynb
            
            
              Last active
              July 9, 2024 15:59
            
              
                Understanding word vectors: A tutorial for "Reading and Writing Electronic Text," a class I teach at ITP. (Python 2.7) Code examples released under CC0 https://creativecommons.org/choose/zero/, other text released under CC BY 4.0 https://creativecommons.org/licenses/by/4.0/
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## openwordnetpt.py
# -*- coding: utf-8 -*-

'''
Functions to read the OpenWordnetPT from RDF files and provide
access to it.
'''

import rdflib
from six.moves import cPickle

## topicmodelling_scikitlearn.py
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.decomposition import NMF, LatentDirichletAllocation

def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print "Topic %d:" % (topic_idx)
        print " ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]])

## tcpproxy.go
package main

import (
	"flag"
	"io"
	"log"
	"net"
)

func main() {

## System Design.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                zekiunal
                / System Design.md
            
            
              Created
              April 18, 2016 12:35
                — forked from vasanthk/System Design.md
            
              
                System Design Cheatsheet
              
          
    #System Design Cheatsheet

Picking the right architecture = Picking the right battles + Managing trade-offs

##Basic Steps

Clarify and agree on the scope of the system


User cases (description of sequences of events that, taken together, lead to a system doing something useful)

Who is going to use it?
How are they going to use it?


## System Design.md

      
              1 file
            
          
              2635 forks
            
          
              59 comments
            
          
              9471 stars
            
          
                vasanthk
                / System Design.md
            
            
              Last active
              July 22, 2024 17:59
            
              
                System Design Cheatsheet
              
          
    System Design Cheatsheet


Picking the right architecture = Picking the right battles + Managing trade-offs

Basic Steps


Clarify and agree on the scope of the system


User cases (description of sequences of events that, taken together, lead to a system doing something useful)

Who is going to use it?
How are they going to use it?


## bufferise.py
def bufferise(defbuf=20, defskip=0):
    def decorate(function):
        def wrapper(*args, **kwargs):
            bufsize = kwargs['bufsize'] if 'bufsize' in kwargs else defbuf
            skiplines = kwargs['skiplines'] if 'skiplines' in kwargs else defskip
            print 'Bufsize = {}'.format(bufsize)
            print 'Skip {} lines'.format(skiplines)
            if skiplines:
                for i, record in enumerate(function(*args, **kwargs), start=1):
                    if i > skiplines:

## recommenderSystem.py
from math import pow, sqrt

def cosine(ratings1, ratings2):
    norm1 = sum([pow(rating,2) for rating in ratings1.values()])
    norm2 = sum([pow(rating,2) for rating in ratings2.values()])
    intersect_keys = filter(lambda x: x in ratings1.keys(), ratings2.keys())
    dot_product = sum([ratings1[key]*ratings2[key] for key in intersect_keys])
    cosine_distance = dot_product/(sqrt(norm1*norm2))
    return cosine_distance
	#!/usr/bin/env -S bash -c "docker run -p 8080:8080 -it --rm \$(docker build --progress plain -f \$0 . 2>&1 \| tee /dev/stderr \| grep -oP 'sha256:[0-9a-f]*')"

	# syntax = docker/dockerfile:1.4.0

	FROM node:20

	WORKDIR /root

	RUN npm install sqlite3
	WITH tokens AS (
	-- Just edit MY_TABLE, MY_TEXT_COL, and MY_PKEY_COL, and watch it go!
	SELECT MY_PKEY_COL AS pkey, (unnest(to_tsvector(MY_TEXT_COL))).* FROM MY_TABLE
	), token_stream AS (
	SELECT pkey, unnest(positions) AS token_idx, lexeme
	FROM tokens ORDER BY pkey, token_idx
	), token_counts AS (
	SELECT lexeme, sum(count) AS count
	FROM (
	SELECT lexeme, array_length(positions, 1) AS count FROM tokens
	# -- coding: utf-8 --

	'''
	Functions to read the OpenWordnetPT from RDF files and provide
	access to it.
	'''

	import rdflib
	from six.moves import cPickle
	from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
	from sklearn.datasets import fetch_20newsgroups
	from sklearn.decomposition import NMF, LatentDirichletAllocation

	def display_topics(model, feature_names, no_top_words):
	for topic_idx, topic in enumerate(model.components_):
	print "Topic %d:" % (topic_idx)
	print " ".join([feature_names[i]
	for i in topic.argsort()[:-no_top_words - 1:-1]])
	def bufferise(defbuf=20, defskip=0):
	def decorate(function):
	def wrapper(args, *kwargs):
	bufsize = kwargs['bufsize'] if 'bufsize' in kwargs else defbuf
	skiplines = kwargs['skiplines'] if 'skiplines' in kwargs else defskip
	print 'Bufsize = {}'.format(bufsize)
	print 'Skip {} lines'.format(skiplines)
	if skiplines:
	for i, record in enumerate(function(args, *kwargs), start=1):
	if i > skiplines:
	from math import pow, sqrt

	def cosine(ratings1, ratings2):
	norm1 = sum([pow(rating,2) for rating in ratings1.values()])
	norm2 = sum([pow(rating,2) for rating in ratings2.values()])
	intersect_keys = filter(lambda x: x in ratings1.keys(), ratings2.keys())
	dot_product = sum([ratings1[key]*ratings2[key] for key in intersect_keys])
	cosine_distance = dot_product/(sqrt(norm1*norm2))
	return cosine_distance