Thiago G. Martins thigm85

## 5.py
print(app_package.schema.schema_to_text)

## 4.py
from vespa.package import ApplicationPackage, Field, RankProfile, Function

app_package = ApplicationPackage(name="lightgbm")

app_package.schema.add_fields(
    Field(name="id", type="string", indexing=["summary", "attribute"]),
    Field(name="numeric", type="double", indexing=["summary", "attribute"]),
    Field(name="categorical", type="string", indexing=["summary", "attribute"])
)

## 3.py
training_set = lgb.Dataset(features, targets)

# Train the model
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'num_leaves': 3,
}
model = lgb.train(params, training_set, num_boost_round=5)

## 2.py
numeric_features = pd.get_dummies(features)
targets = (
    (numeric_features["query(value)"] +
     numeric_features["attribute(numeric)"]  -
     0.5 * numeric_features["attribute(categorical)_a"] +
     0.5 * numeric_features["attribute(categorical)_c"]) > 1.0
) * 1.0
targets

## 1.py
# Create random training set
features = pd.DataFrame({
    "query(value)": np.random.random(100),
    "attribute(numeric)": np.random.random(100),
    "attribute(categorical)": pd.Series(
        np.random.choice(["a", "b", "c"], size=100),
        dtype="category"
    )
})
features.head()

## pyvespa_onnx_code.py
# clone pyvespa repo: git clone git@github.com:vespa-engine/pyvespa.git
# Install pyvespa from master branch - pip install -e .[full]
# save model.onnx file on the working directory

from vespa.package import ApplicationPackage, Field, OnnxModel, QueryTypeField, RankProfile, Function, FieldSet, SecondPhaseRanking

#
# Create the application package - it assumes you have model.onnx file on the working directory
#
app_package = ApplicationPackage(name="crossencoder")

## yql_parentheses_full.json
{'trace': {'children': [{'message': "Using query profile 'default' of type 'root'"}, {'message': "Invoking chain 'vespa' [com.yahoo.search.querytransform.WeakAndReplacementSearcher@vespa -> com.yahoo.prelude.statistics.StatisticsSearcher@native -> ... -> federation@native]"}, {'children': [{'message': 'Query parsed to: Query currently a placeholder, NullItem encountered.'}, {'message': 'YQL+ query parsed: [select * from sources * where rank(default contains "BCC", default contains "JINSA", (default contains "The" OR default contains "New" OR default contains "START" OR default contains "treaty" OR default contains "harms" OR default contains "US" OR default contains "nuclear" OR default contains "capabilities" OR default contains "As" OR default contains "David" OR default contains "Ganz" OR default contains "the" OR default contains "president" OR default contains "of" OR default contains "the" OR default contains "Jewish" OR default contains "Institute" OR default contains "for" OR default contains "Nationa

## yql_parentheses.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                thigm85
                / yql_parentheses.md
            
            
              Created
              February 18, 2022 12:23
            
          
    Another query parser question. What is the hole of parenthesis?
Example: This long query:
"The New START treaty harms US nuclear capabilities As David Ganz, the president of the Jewish Institute for National Security Affairs (JINSA), argues: This treaty would restrain the development and deployment of new nuclear weapons, missile defense systems, and missile delivery systems. [1] The atrophying U.S. nuclear arsenal and weapons enterprise make reductions in the U.S. strategic nuclear arsenal even more dangerous. The new START treaty allows nuclear modernization but while the US capacity to modernize nuclear weapons is limited and either congress or the president is likely to prevent modernization on cost grounds. The Russians have a large, if unknown, advantage over the United States in terms of nonstrategic, particularly tactical, and nuclear weapons. The New START treaty however ignores these weapons entirely as it is focused on strategic arms. This therefore leaves the Russians with an advantage and p


## 20211022-28.py
next(iter(image_dataset)).shape

## 20211022-27.py
def from_image_to_vector(x, process_fn):
    with torch.no_grad():
        image_features = model.encode_image(process_fn(x).unsqueeze(0))
    return image_features

image_dataset = ImageDataset(
    img_dir=os.environ["IMG_DIR"],
    transform=lambda x: from_image_to_vector(x, process_fn=preprocess)
)
	from vespa.package import ApplicationPackage, Field, RankProfile, Function

	app_package = ApplicationPackage(name="lightgbm")

	app_package.schema.add_fields(
	Field(name="id", type="string", indexing=["summary", "attribute"]),
	Field(name="numeric", type="double", indexing=["summary", "attribute"]),
	Field(name="categorical", type="string", indexing=["summary", "attribute"])
	)
	training_set = lgb.Dataset(features, targets)

	# Train the model
	params = {
	'objective': 'binary',
	'metric': 'binary_logloss',
	'num_leaves': 3,
	}
	model = lgb.train(params, training_set, num_boost_round=5)
	numeric_features = pd.get_dummies(features)
	targets = (
	(numeric_features["query(value)"] +
	numeric_features["attribute(numeric)"] -
	0.5 * numeric_features["attribute(categorical)_a"] +
	0.5 * numeric_features["attribute(categorical)_c"]) > 1.0
	) * 1.0
	targets
	# Create random training set
	features = pd.DataFrame({
	"query(value)": np.random.random(100),
	"attribute(numeric)": np.random.random(100),
	"attribute(categorical)": pd.Series(
	np.random.choice(["a", "b", "c"], size=100),
	dtype="category"
	)
	})
	features.head()
	# clone pyvespa repo: git clone git@github.com:vespa-engine/pyvespa.git
	# Install pyvespa from master branch - pip install -e .[full]
	# save model.onnx file on the working directory

	from vespa.package import ApplicationPackage, Field, OnnxModel, QueryTypeField, RankProfile, Function, FieldSet, SecondPhaseRanking

	#
	# Create the application package - it assumes you have model.onnx file on the working directory
	#
	app_package = ApplicationPackage(name="crossencoder")
	def from_image_to_vector(x, process_fn):
	with torch.no_grad():
	image_features = model.encode_image(process_fn(x).unsqueeze(0))
	return image_features

	image_dataset = ImageDataset(
	img_dir=os.environ["IMG_DIR"],
	transform=lambda x: from_image_to_vector(x, process_fn=preprocess)
	)