Skip to content

Commit

Permalink
Merge pull request #382 from vespa-engine/tgm/body-query-only
Browse files Browse the repository at this point in the history
Move query module to learntorank
  • Loading branch information
thigm85 authored Oct 9, 2022
2 parents ec80c6a + 75c3e18 commit 7328de2
Show file tree
Hide file tree
Showing 14 changed files with 514 additions and 2,127 deletions.
960 changes: 135 additions & 825 deletions docs/sphinx/source/collect-training-data.ipynb

Large diffs are not rendered by default.

123 changes: 114 additions & 9 deletions docs/sphinx/source/exchange-data-with-app.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,117 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Waiting for configuration server.\n",
"Waiting for configuration server.\n",
"Waiting for configuration server.\n",
"Waiting for configuration server.\n",
"Waiting for application status.\n",
"Waiting for application status.\n",
"Waiting for configuration server, 0/300 seconds...\n",
"Waiting for configuration server, 5/300 seconds...\n",
"Waiting for configuration server, 10/300 seconds...\n",
"Waiting for application status, 0/300 seconds...\n",
"Waiting for application status, 5/300 seconds...\n",
"Waiting for application status, 10/300 seconds...\n",
"Waiting for application status, 15/300 seconds...\n",
"Waiting for application status, 20/300 seconds...\n",
"Waiting for application status, 25/300 seconds...\n",
"Waiting for application status, 30/300 seconds...\n",
"Finished deployment.\n"
]
}
],
"source": [
"# this is a hidden cell. It will not show on the documentation HTML.\n",
"import os\n",
"from vespa.package import (\n",
" HNSW,\n",
" Document,\n",
" Field,\n",
" Schema,\n",
" FieldSet,\n",
"# SecondPhaseRanking,\n",
" RankProfile,\n",
" ApplicationPackage,\n",
" QueryProfile,\n",
" QueryProfileType,\n",
" QueryTypeField\n",
")\n",
"\n",
"from vespa.deployment import VespaDocker\n",
"from vespa.gallery import QuestionAnswering\n",
"\n",
"class QuestionAnswering(ApplicationPackage):\n",
" def __init__(self, name: str = \"qa\"):\n",
" context_document = Document(\n",
" fields=[\n",
" Field(\n",
" name=\"questions\",\n",
" type=\"array<int>\",\n",
" indexing=[\"summary\", \"attribute\"],\n",
" ),\n",
" Field(name=\"dataset\", type=\"string\", indexing=[\"summary\", \"attribute\"]),\n",
" Field(name=\"context_id\", type=\"int\", indexing=[\"summary\", \"attribute\"]),\n",
" Field(\n",
" name=\"text\",\n",
" type=\"string\",\n",
" indexing=[\"summary\", \"index\"],\n",
" index=\"enable-bm25\",\n",
" ),\n",
" ]\n",
" )\n",
" context_schema = Schema(\n",
" name=\"context\",\n",
" document=context_document,\n",
" fieldsets=[FieldSet(name=\"default\", fields=[\"text\"])],\n",
" rank_profiles=[\n",
" RankProfile(name=\"bm25\", inherits=\"default\", first_phase=\"bm25(text)\"),\n",
" RankProfile(\n",
" name=\"nativeRank\",\n",
" inherits=\"default\",\n",
" first_phase=\"nativeRank(text)\",\n",
" ),\n",
" ],\n",
" )\n",
" sentence_document = Document(\n",
" inherits=\"context\",\n",
" fields=[\n",
" Field(\n",
" name=\"sentence_embedding\",\n",
" type=\"tensor<float>(x[512])\",\n",
" indexing=[\"attribute\", \"index\"],\n",
" ann=HNSW(\n",
" distance_metric=\"euclidean\",\n",
" max_links_per_node=16,\n",
" neighbors_to_explore_at_insert=500,\n",
" ),\n",
" )\n",
" ],\n",
" )\n",
" sentence_schema = Schema(\n",
" name=\"sentence\",\n",
" document=sentence_document,\n",
" fieldsets=[FieldSet(name=\"default\", fields=[\"text\"])],\n",
" rank_profiles=[\n",
" RankProfile(\n",
" name=\"semantic-similarity\",\n",
" inherits=\"default\",\n",
" first_phase=\"closeness(sentence_embedding)\",\n",
" ),\n",
" RankProfile(name=\"bm25\", inherits=\"default\", first_phase=\"bm25(text)\"),\n",
" RankProfile(\n",
" name=\"bm25-semantic-similarity\",\n",
" inherits=\"default\",\n",
" first_phase=\"bm25(text) + closeness(sentence_embedding)\",\n",
" ),\n",
" ],\n",
" )\n",
" super().__init__(\n",
" name=name,\n",
" schema=[context_schema, sentence_schema],\n",
" query_profile=QueryProfile(),\n",
" query_profile_type=QueryProfileType(\n",
" fields=[\n",
" QueryTypeField(\n",
" name=\"ranking.features.query(query_embedding)\",\n",
" type=\"tensor<float>(x[512])\",\n",
" )\n",
" ]\n",
" ),\n",
" )\n",
"\n",
"app_package = QuestionAnswering()\n",
"vespa_docker = VespaDocker()\n",
Expand Down Expand Up @@ -147,7 +243,16 @@
"execution_count": 4,
"id": "meaning-jamaica",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successful documents fed: 100/100.\n",
"Batch progress: 1/1.\n"
]
}
],
"source": [
"response = app.feed_batch(schema=\"sentence\", batch=batch_feed)"
]
Expand Down Expand Up @@ -648,7 +753,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
38 changes: 24 additions & 14 deletions docs/sphinx/source/query-model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -112,21 +112,21 @@
" <th>0</th>\n",
" <td>0</td>\n",
" <td>id:covid-19:doc::142863</td>\n",
" <td>11.334371</td>\n",
" <td>11.824458</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>id:covid-19:doc::187156</td>\n",
" <td>11.318515</td>\n",
" <td>11.818079</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>id:covid-19:doc::31328</td>\n",
" <td>11.288960</td>\n",
" <td>11.288179</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
Expand All @@ -135,9 +135,9 @@
],
"text/plain": [
" qid doc_id score rank\n",
"0 0 id:covid-19:doc::142863 11.334371 0\n",
"1 0 id:covid-19:doc::187156 11.318515 1\n",
"2 0 id:covid-19:doc::31328 11.288960 2"
"0 0 id:covid-19:doc::142863 11.824458 0\n",
"1 0 id:covid-19:doc::187156 11.818079 1\n",
"2 0 id:covid-19:doc::31328 11.288179 2"
]
},
"execution_count": 4,
Expand All @@ -146,7 +146,13 @@
}
],
"source": [
"standard_result = app.query(query=\"this is a test\", query_model=standard_query_model)\n",
"from learntorank.query import send_query\n",
"\n",
"standard_result = send_query(\n",
" app=app, \n",
" query=\"this is a test\", \n",
" query_model=standard_query_model\n",
")\n",
"standard_result.get_hits().head(3)"
]
},
Expand Down Expand Up @@ -187,21 +193,21 @@
" <th>0</th>\n",
" <td>0</td>\n",
" <td>id:covid-19:doc::142863</td>\n",
" <td>11.334371</td>\n",
" <td>11.824458</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>id:covid-19:doc::187156</td>\n",
" <td>11.318515</td>\n",
" <td>11.818079</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>id:covid-19:doc::31328</td>\n",
" <td>11.288960</td>\n",
" <td>11.288179</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
Expand All @@ -210,9 +216,9 @@
],
"text/plain": [
" qid doc_id score rank\n",
"0 0 id:covid-19:doc::142863 11.334371 0\n",
"1 0 id:covid-19:doc::187156 11.318515 1\n",
"2 0 id:covid-19:doc::31328 11.288960 2"
"0 0 id:covid-19:doc::142863 11.824458 0\n",
"1 0 id:covid-19:doc::187156 11.818079 1\n",
"2 0 id:covid-19:doc::31328 11.288179 2"
]
},
"execution_count": 5,
Expand All @@ -221,7 +227,11 @@
}
],
"source": [
"flexible_result = app.query(query=\"this is a test\", query_model=flexible_query_model)\n",
"flexible_result = send_query(\n",
" app=app, \n",
" query=\"this is a test\", \n",
" query_model=flexible_query_model\n",
")\n",
"flexible_result.get_hits().head(3)"
]
}
Expand Down
31 changes: 19 additions & 12 deletions docs/sphinx/source/query.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
{
"data": {
"text/plain": [
"9865"
"9758"
]
},
"execution_count": 4,
Expand Down Expand Up @@ -120,7 +120,7 @@
{
"data": {
"text/plain": [
"['8n6eybze', '2lwzhqer', '8n6eybze', '8art2tyj', 'xej338lo']"
"['8n6eybze', '2lwzhqer', '8art2tyj', 'oud5ioks', 'ifanumo8']"
]
},
"execution_count": 6,
Expand Down Expand Up @@ -152,9 +152,10 @@
"metadata": {},
"outputs": [],
"source": [
"from learntorank.query import QueryModel, OR, Ranking\n",
"from learntorank.query import QueryModel, OR, Ranking, send_query\n",
"\n",
"results = app.query(\n",
"results = send_query(\n",
" app=app,\n",
" query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
" query_model = QueryModel(\n",
" match_phase=OR(), \n",
Expand Down Expand Up @@ -224,8 +225,11 @@
"metadata": {},
"outputs": [],
"source": [
"results = app.query(query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
" query_model=query_model)"
"results = send_query(\n",
" app=app,\n",
" query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
" query_model=query_model\n",
")"
]
},
{
Expand All @@ -236,7 +240,7 @@
{
"data": {
"text/plain": [
"1520"
"1513"
]
},
"execution_count": 11,
Expand Down Expand Up @@ -270,7 +274,7 @@
{
"data": {
"text/plain": [
"[144384, 269386, 144385]"
"[144384, 269386, 280365]"
]
},
"execution_count": 12,
Expand All @@ -296,9 +300,12 @@
"metadata": {},
"outputs": [],
"source": [
"results_with_recall = app.query(query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
" query_model=query_model,\n",
" recall = (\"id\", top_ids[1:3]))"
"results_with_recall = send_query(\n",
" app=app,\n",
" query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
" query_model=query_model,\n",
" recall = (\"id\", top_ids[1:3])\n",
")"
]
},
{
Expand All @@ -316,7 +323,7 @@
{
"data": {
"text/plain": [
"[269386, 144385]"
"[269386, 280365]"
]
},
"execution_count": 14,
Expand Down
Loading

0 comments on commit 7328de2

Please sign in to comment.