QueryBuilder Guide

Complete guide to using QueryBuilder for flexible DSIS data queries.

Overview

QueryBuilder provides a fluent API for constructing OData queries with type safety and automatic result casting when used with dsis_model_sdk.

Basic Configuration

Use environment variables for configuration (recommended). Set these in your CI/infra or a local .env file and load them with python-dotenv.

import os
from dotenv import load_dotenv

load_dotenv()

config = DSISConfig(
    environment=Environment[os.getenv("ENVIRONMENT", "DEV")],
    tenant_id=os.getenv("TENANT_ID"),
    client_id=os.getenv("CLIENT_ID"),
    client_secret=os.getenv("CLIENT_SECRET"),
    access_app_id=os.getenv("ACCESS_APP_ID"),
    dsis_username=os.getenv("DSIS_USERNAME"),
    dsis_password=os.getenv("DSIS_PASSWORD"),
    subscription_key_dsauth=os.getenv("SUBSCRIPTION_KEY_DSAUTH"),
    subscription_key_dsdata=os.getenv("SUBSCRIPTION_KEY_DSDATA"),
)

QueryBuilder Basics

QueryBuilder requires model_name, district_id and project parameters, then builds the query using method chaining.

Simple Query with String Schema

# Build query - QueryBuilder IS the query object (no .build() needed)
query = (
    QueryBuilder(
        model_name="OW5000",
        district_id="OpenWorks_OW_SV4TSTA_SingleSource-OW_SV4TSTA",
        project="SNORRE",
    )
    .schema("Fault")
    .select("fault_id,fault_type,fault_name")
    .filter("fault_type eq 'NORMAL'")
)

# Execute query
for fault in client.execute_query(query):
    print(fault)

Type-Safe Query with Model Class

from dsis_model_sdk.models.common import Basin

# Build query with model class for automatic type safety
query = (
    QueryBuilder(
        model_name="OW5000",
        district_id="your-district-id",
        project="your-project",
    )
    .schema(Basin)
    .select("basin_name,basin_id,native_uid")
)

# Auto-cast results to Basin instances
for basin in client.execute_query(query, cast=True):
    print(f"Basin: {basin.basin_name} (ID: {basin.basin_id})")

QueryBuilder Methods

schema()

Set the data schema (table) to query.

# String schema name
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Well")

# Model class (enables type-safe casting)
from dsis_model_sdk.models.native import Well
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema(Well)

select()

Choose specific fields to retrieve.

# Single field
query.select("well_name")

# Multiple fields (comma-separated)
query.select("well_name,well_uwi,spud_date")

# Chain multiple selects (they concatenate)
query.select("well_name").select("well_uwi")

filter()

Apply OData filter expressions.

# Simple equality
query.filter("well_type eq 'Producer'")

# Comparison operators
query.filter("depth gt 1000")
query.filter("depth lt 5000")

# Logical operators
query.filter("well_type eq 'Producer' and depth gt 1000")

# String functions
query.filter("contains(well_name, 'A-')")

expand()

Include related entities.

# Expand single relationship
query.expand("wellbores")

# Expand multiple relationships
query.expand("wellbores,interpretations")

format()

Set the response format parameter.

# Default: json format (included by default)
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Well").select("well_name")
# Result: Well?$format=json&$select=well_name

# Explicitly set to json
query.format("json")

# Omit format parameter entirely
query.format("")  # or .format(None)
# Result: Well?$select=well_name

entity()

Target a specific entity's binary data field for bulk data retrieval. This configures the query so that build_endpoint() produces a path ending in /{Schema}('{native_uid}')/{data_field}.

# Standard bulk data (default data_field="data")
bulk_query = query.entity("46075")
# build_endpoint() → ".../HorizonData3D('46075')/data"

# SurfaceGrid with $value endpoint
bulk_query = query.entity("46075", data_field="$value")
# build_endpoint() → ".../SurfaceGrid('46075')/$value"

build_endpoint()

Build the full API endpoint path from the query's configuration.

query = QueryBuilder(
    model_name="OW5000",
    district_id="123",
    project="SNORRE",
).schema("Well")

# Without entity: returns collection path
query.build_endpoint()
# "OW5000/5000107/123/SNORRE/Well"

# With entity: returns entity data path
query.entity("46075")
query.build_endpoint()
# "OW5000/5000107/123/SNORRE/Well('46075')/data"

reset()

Clear query parameters for reuse (clears schema, select, expand, filter, format, and entity).

query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
)

# First query
query.schema("Well").select("well_name")
for well in client.execute_query(query):
    print(well)

# Reset and build new query
query.reset().schema("Fault").select("fault_type")
for fault in client.execute_query(query):
    print(fault)

Automatic Pagination

The DSIS API returns a maximum of 1000 items per response. When there are more results, the response includes an odata.nextLink field pointing to the next page.

By default, execute_query() automatically follows all odata.nextLink references and yields items as they are fetched (memory efficient). You can control pagination with the max_pages parameter:

# Default: Fetch all pages (max_pages=-1)
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Well")

# Option 1: Process items as they arrive (streaming, memory efficient)
for well in client.execute_query(query):
    process(well)  # Process each item immediately

# Option 2: Collect all items into a list
all_wells = list(client.execute_query(query))
print(f"Total wells: {len(all_wells)}")

# Option 3: Fetch only first page (max_pages=1)
first_page_items = list(client.execute_query(query, max_pages=1))
print(f"First page: {len(first_page_items)} wells (max 1000)")

# Option 4: Fetch first two pages (max_pages=2)
two_pages_items = list(client.execute_query(query, max_pages=2))
print(f"First two pages: {len(two_pages_items)} wells")

max_pages Parameter:

max_pages=-1 (default): Fetch and yield from all pages
max_pages=1: Yield items from first page only (max 1000 items)
max_pages=2: Yield items from first two pages
max_pages=N: Yield items from first N pages (or fewer if fewer pages available)

When to use different max_pages values:

-1 (unlimited): You want all data automatically across all pages
1: You only need a sample, or want to implement custom pagination
N>1: You want to process data in page-sized chunks

Request Timeout

You can set an optional timeout parameter on execute_query(), get(), get_bulk_data(), and get_bulk_data_stream() to control how long each HTTP request waits before raising an error. By default, no timeout is applied.

# Single timeout value (seconds) — applies to both connect and read
for item in client.execute_query(query, timeout=300):
    process(item)

# Tuple timeout — (connect_timeout, read_timeout) in seconds
for item in client.execute_query(query, timeout=(5, 300)):
    process(item)

# Also works with get(), get_bulk_data(), and get_bulk_data_stream()
data = client.get("OW5000", "5000107", "123", "SNORRE", schema="Well", timeout=60)
binary = client.get_bulk_data(bulk_query, timeout=600)

timeout Parameter:

timeout=None (default): No timeout — wait indefinitely
timeout=300: Both connect and read timeout set to 300 seconds
timeout=(5, 300): Connect timeout of 5 seconds, read timeout of 300 seconds

The timeout applies to each individual HTTP request, including pagination requests. If a query fetches multiple pages, each page request uses the same timeout.

!!! tip For large paginated queries, use a generous read timeout (e.g., timeout=(5, 300)) to allow time for the server to process each page while still failing fast on connection issues.

Execution Patterns

⚠️ Critical: Schema Requirement for `cast=True`

If you want to use cast=True to automatically convert results to model instances, you MUST pass a model class (not a string) to .schema():

# ✅ Correct: Pass model class for casting
from dsis_model_sdk.models.common import Basin
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema(Basin)
results = client.execute_query(query, cast=True)  # Works!

# ❌ Wrong: String schema name won't work with cast=True
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Basin")
results = client.execute_query(query, cast=True)  # Has no effect!

Pattern 1: Basic Execution (Streaming)

query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Basin")

# Process items as they arrive (memory efficient)
for item in client.execute_query(query):
    print(item.get("basin_name"))

# Or collect all items into a list (uses more memory)
all_items = list(client.execute_query(query))
print(f"Total items: {len(all_items)}")

Pattern 1b: Single Page Execution

query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Basin")

# Fetch only first page (max 1000 items)
first_page_items = list(client.execute_query(query, max_pages=1))
print(f"Retrieved {len(first_page_items)} items from first page")

Pattern 2: Auto-Casting with Model Class

from dsis_model_sdk.models.common import Basin

query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema(Basin).select("basin_name,basin_id")

# Option 1: Stream and cast each item as it arrives (memory efficient)
for basin in client.execute_query(query, cast=True):
    print(f"Basin: {basin.basin_name} (ID: {basin.basin_id})")

# Option 2: Collect all cast items into a list
basins = list(client.execute_query(query, cast=True))

# Option 3: Fetch only first page and cast
basins = list(client.execute_query(query, cast=True, max_pages=1))

⚠️ IMPORTANT: Using cast=True

To use cast=True, you MUST build your query using a model class imported from dsis_model_sdk, not a string schema name:

# ✅ CORRECT: Using model class from dsis_model_sdk
from dsis_model_sdk.models.common import Basin  # or native
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema(Basin)
basins = list(client.execute_query(query, cast=True))

# ❌ INCORRECT: Using string schema name with cast=True
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Basin")
basins = list(client.execute_query(query, cast=True))  # Will not work!

The schema model can come from either:

from dsis_model_sdk.models.common import Basin
from dsis_model_sdk.models.native import Basin

If you use a string schema name, cast=True will have no effect. Omit cast=True or use a model class instead.

Pattern 3: Error Handling

try:
    query = QueryBuilder(
        model_name="OW5000",
        district_id=dist,
        project=prj,
    ).schema("Well")

    # Process items as they arrive
    item_count = 0
    for item in client.execute_query(query):
        item_count += 1
        # Process each item

    print(f"Retrieved {item_count} wells")
except Exception as e:
    print(f"Query failed: {e}")

Complete Examples

Example 1: Filtered Query with Streaming

# Use the `DSISConfig(...)` example from the "Basic Configuration"
# section above to create `config` and `client`. The snippet below assumes
# `client` is already created and available.

# Build query with filters
query = (
    QueryBuilder(
        model_name="OW5000",
        district_id="OpenWorks_OW_SV4TSTA_SingleSource-OW_SV4TSTA",
        project="SNORRE",
    )
    .schema("Well")
    .select("well_name,well_uwi,spud_date")
    .filter("well_type eq 'Producer'")
)

# Process wells as they arrive
for well in client.execute_query(query):
    print(f"Well: {well['well_name']}")

# Or collect all into a list
wells = list(client.execute_query(query))
print(f"Retrieved {len(wells)} producer wells")

Example 2: Type-Safe Query with Error Handling

from dsis_model_sdk.models.common import Basin

query = (
    QueryBuilder(
        model_name="OW5000",
        district_id=dist,
        project=prj,
    )
    .schema(Basin)
    .select("basin_name,basin_id,native_uid")  # Include required fields
)

try:
    # Stream and auto-cast each basin as it arrives
    for basin in client.execute_query(query, cast=True):
        print(f"Basin: {basin.basin_name}")
        print(f"  ID: {basin.basin_id}")
        print(f"  UID: {basin.native_uid}")

except ImportError:
    print("dsis_model_sdk not installed - install for type-safe casting")
except Exception as e:
    print(f"Query failed: {e}")

Example 3: Reusable Query Builder

# Create base query builder
base_query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
)

# Query 1: Get all faults
fault_query = base_query.schema("Fault").select("fault_id,fault_type")
faults = list(client.execute_query(fault_query))

# Query 2: Get all wells (reset and rebuild)
well_query = base_query.reset().schema("Well").select("well_name,well_uwi")
wells = list(client.execute_query(well_query))

Example 4: Single Page Execution

# Get first page only (max 1000 items)
query = QueryBuilder(
    model_name="OW5000",
    district_id=dist,
    project=prj,
).schema("Well")
first_page_wells = list(client.execute_query(query, max_pages=1))

print(f"First page: {len(first_page_wells)} wells")

# For limited pagination (e.g., 2-3 pages), use max_pages parameter
two_pages_wells = list(client.execute_query(query, max_pages=2))
print(f"First two pages: {len(two_pages_wells)} wells")

Tips and Best Practices

Always specify required fields: When using model classes, ensure you select all required fields for the model
Use environment variables: Never hardcode credentials in your code
Handle errors gracefully: Wrap execute_query() in try-except blocks
Reuse QueryBuilder: Use .reset() to clear and rebuild queries instead of creating new instances
Enable auto-casting: Use cast=True with model classes for type-safe results
Test connection first: Call client.test_connection() when setting up to see if credentials are correct
Set timeouts for production: Use timeout to prevent requests from hanging indefinitely (e.g., timeout=300 for 5 minutes)