Hannes Hapke hanneshapke

## redis_vectors.py
import bz2
import numpy as np
import pickle

from django.conf import settings
from django_redis import get_redis_connection
from gensim.models.keyedvectors import KeyedVectors

from .constants import GOOGLE_WORD2VEC_MODEL_NAME
from .redis import load_word2vec_model_into_redis, query_redis

## redis.py
import bz2
import pickle

from django.conf import settings
from djang_redis import get_redis_connection
from tqdm import tqdm

from .constants import GOOGLE_WORD2VEC_MODEL_NAME


## .bashrc
export CURRENT_DEV=kreuzberg
alias latest_dev='cd ~/development/$CURRENT_DEV'

# ssh tunnel
alias ssd='~/bin/ssh_host_color.sh ubuntu@remote -p 823 -L 6006:gpu:6006'

# add additional paths to the PYTHONPATH
export PYTHONPATH=$PYTHONPATH:~/development/additional_package

# git shortcuts

## model_inference_tf_example.py
import base64
import googleapiclient.discovery

from example_pb2 import Example
from feature_pb2 import BytesList, Feature, Features


def _convert_to_pb(value):
    """ Serialize a given sentence to the ProtoBuf Structure required to model the tf.Example data structure.
    Feel free to add more features and different data types if your models reqiures different inputs. An overview of

## tfx-pipeline-for-bert-preprocessing.ipynb

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              8 stars
            
          
                hanneshapke
                / tfx-pipeline-for-bert-preprocessing.ipynb
            
            
              Last active
              August 21, 2021 06:18
            
              
                TFX Pipeline for Bert Preprocessing.ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## before_tokenization_with_berttokenizer.py
[
    "Clara is playing the piano."
    "Maria likes to play soccer.",
    "Hi Tom!"
]

## after_tokenization_with_berttokenizer.py
[
    [[b'clara'], [b'is'], [b'playing'], [b'the'], [b'piano'], [b'.']],
    [[b'maria'], [b'likes'], [b'to'], [b'play'], [b'soccer'], [b'.']],
    [[b'hi'], [b'tom'], [b'!']]
]

## use_of_berttokenizer.py
import tensorflow_text as text

vocab_file_path = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()

bert_tokenizer = text.BertTokenizer(
    vocab_lookup_table=vocab_file_path,
    token_out_type=tf.int64,
    lower_case=do_lower_case
)

## example_dataset.csv
‘This is the best movie I have ever seen ...’       -> 1
‘Probably the worst movie produced in 2019 ...’     -> 0
‘Tom Hank\’s performance turns this movie into ...’ -> ?

## partial_setup_of_berttokenizer.py
import tensorflow_hub as hub

BERT_TFHUB_URL = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2"

bert_layer = hub.KerasLayer(handle=BERT_TFHUB_URL, trainable=True)
vocab_file_path = bert_layer.resolved_object.vocab_file.asset_path.numpy()
	import bz2
	import numpy as np
	import pickle

	from django.conf import settings
	from django_redis import get_redis_connection
	from gensim.models.keyedvectors import KeyedVectors

	from .constants import GOOGLE_WORD2VEC_MODEL_NAME
	from .redis import load_word2vec_model_into_redis, query_redis
	export CURRENT_DEV=kreuzberg
	alias latest_dev='cd ~/development/$CURRENT_DEV'

	# ssh tunnel
	alias ssd='~/bin/ssh_host_color.sh ubuntu@remote -p 823 -L 6006:gpu:6006'

	# add additional paths to the PYTHONPATH
	export PYTHONPATH=$PYTHONPATH:~/development/additional_package

	# git shortcuts
	import base64
	import googleapiclient.discovery

	from example_pb2 import Example
	from feature_pb2 import BytesList, Feature, Features


	def _convert_to_pb(value):
	""" Serialize a given sentence to the ProtoBuf Structure required to model the tf.Example data structure.
	Feel free to add more features and different data types if your models reqiures different inputs. An overview of
	[
	"Clara is playing the piano."
	"Maria likes to play soccer.",
	"Hi Tom!"
	]
	[
	[[b'clara'], [b'is'], [b'playing'], [b'the'], [b'piano'], [b'.']],
	[[b'maria'], [b'likes'], [b'to'], [b'play'], [b'soccer'], [b'.']],
	[[b'hi'], [b'tom'], [b'!']]
	]
	import tensorflow_text as text

	vocab_file_path = bert_layer.resolved_object.vocab_file.asset_path.numpy()
	do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()

	bert_tokenizer = text.BertTokenizer(
	vocab_lookup_table=vocab_file_path,
	token_out_type=tf.int64,
	lower_case=do_lower_case
	)
	‘This is the best movie I have ever seen ...’ -> 1
	‘Probably the worst movie produced in 2019 ...’ -> 0
	‘Tom Hank\’s performance turns this movie into ...’ -> ?
	import tensorflow_hub as hub

	BERT_TFHUB_URL = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2"

	bert_layer = hub.KerasLayer(handle=BERT_TFHUB_URL, trainable=True)
	vocab_file_path = bert_layer.resolved_object.vocab_file.asset_path.numpy()