Skip to content
4 changes: 4 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@
**Vulnerability:** The `_legacy_export_to_video_bytes` function in `src/nodetool/media/video/video_utils.py` used a hardcoded path (`/tmp/temp_video.mp4`) for temporary file storage during video encoding. This allowed for symlink attacks, race conditions, and cross-tenant data leakage if multiple processes or users executed the function concurrently.
**Learning:** Hardcoded temporary paths are a common anti-pattern that can easily be overlooked in fallback or legacy code paths. They violate the principle of isolation and can lead to severe vulnerabilities in multi-user environments.
**Prevention:** Always use secure temporary file creation mechanisms like `tempfile.NamedTemporaryFile` with appropriate cleanup logic (e.g., `delete=False` followed by a `try...finally` block for manual removal) to guarantee uniqueness and prevent race conditions.
## 2024-05-24 - SQL Injection in PostgresAdapter Dynamic Queries
**Vulnerability:** SQL injection vulnerabilities existed in `PostgresAdapter` methods (`create_index`, `drop_index`, `get`, `delete`, and `save`) because string formatting (f-strings) was used directly to construct SQL queries with unvalidated identifiers like table names, index names, and column names.
**Learning:** Even when table and column schemas are generally considered internal, dynamic schema generation and migrations in a generic ORM-like adapter can introduce user-controlled input into query construction. Not using `psycopg.sql.Identifier` leaves these methods exposed.
**Prevention:** Always use the `psycopg.sql` module (`SQL`, `Identifier`, `Placeholder`) for dynamic SQL generation. Apply strict regex-based validation (e.g., `_validate_column_name` for `^[a-zA-Z0-9_]+$`) to any identifiers before constructing the query to ensure defense-in-depth against SQL injection.
77 changes: 59 additions & 18 deletions src/nodetool/models/postgres_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ def convert_to_postgres_format(value: Any, py_type: type | None) -> int | float
raise TypeError(f"Unsupported type for PostgreSQL: {py_type}")


def _validate_column_name(column_name: str) -> str:
"""Validate column name to prevent SQL injection.

Args:
column_name: The column name to validate.

Returns:
The validated column name.

Raises:
ValueError: If the column name is invalid.
"""
if not re.match(r"^[a-zA-Z0-9_]+$", column_name):
raise ValueError(f"Invalid identifier: {column_name}")
return column_name


def convert_from_postgres_format(value: Any, py_type: type | None) -> Any:
"""
Convert a value from PostgreSQL to a Python type based on the provided Python type.
Expand Down Expand Up @@ -354,17 +371,27 @@ async def save(self, item: dict[str, Any]) -> None:
item: A dictionary representing the model instance to save.
"""
valid_keys = [key for key in item if key in self.fields]
columns = ", ".join(valid_keys)
placeholders = ", ".join([f"%({key})s" for key in valid_keys])
values = {key: convert_to_postgres_format(item[key], self.fields[key].annotation) for key in valid_keys}
query = (
f"INSERT INTO {self.table_name} ({columns}) VALUES ({placeholders}) ON CONFLICT ({self.get_primary_key()}) DO UPDATE SET "
+ ", ".join([f"{key} = EXCLUDED.{key}" for key in valid_keys])
columns_sql = SQL(", ").join(map(Identifier, map(_validate_column_name, valid_keys)))
placeholders_sql = SQL(", ").join([SQL(f"%({key})s") for key in valid_keys])

update_set_sql = SQL(", ").join(
[SQL("{} = EXCLUDED.{}").format(Identifier(_validate_column_name(key)), Identifier(_validate_column_name(key))) for key in valid_keys]
)

query = SQL("INSERT INTO {} ({}) VALUES ({}) ON CONFLICT ({}) DO UPDATE SET {}").format(
Identifier(_validate_column_name(self.table_name)),
columns_sql,
placeholders_sql,
Identifier(_validate_column_name(self.get_primary_key())),
update_set_sql,
)

values = {key: convert_to_postgres_format(item[key], self.fields[key].annotation) for key in valid_keys}

pool = await self._get_pool()
async with pool.connection() as conn:
async with conn.cursor() as cursor:
await cursor.execute(query, values) # type: ignore[arg-type]
await cursor.execute(query, values)
await conn.commit()

async def get(self, key: Any) -> dict[str, Any] | None:
Expand All @@ -378,11 +405,16 @@ async def get(self, key: Any) -> dict[str, Any] | None:
Attributes are converted back to their Python types.
"""
primary_key = self.get_primary_key()
cols = ", ".join(self.fields)
query = f"SELECT {cols} FROM {self.table_name} WHERE {primary_key} = %s"
cols_sql = SQL(", ").join(map(Identifier, map(_validate_column_name, self.fields.keys())))
query = SQL("SELECT {} FROM {} WHERE {} = {}").format(
cols_sql,
Identifier(_validate_column_name(self.table_name)),
Identifier(_validate_column_name(primary_key)),
Placeholder(),
)
pool = await self._get_pool()
async with pool.connection() as conn, conn.cursor(row_factory=dict_row) as cursor:
await cursor.execute(query, (key,)) # type: ignore[arg-type]
await cursor.execute(query, (key,))
item = await cursor.fetchone()
if item is None:
return None
Expand All @@ -395,11 +427,15 @@ async def delete(self, primary_key: Any) -> None:
primary_key: The primary key value of the item to delete.
"""
pk_column = self.get_primary_key()
query = f"DELETE FROM {self.table_name} WHERE {pk_column} = %s"
query = SQL("DELETE FROM {} WHERE {} = {}").format(
Identifier(_validate_column_name(self.table_name)),
Identifier(_validate_column_name(pk_column)),
Placeholder(),
)
pool = await self._get_pool()
async with pool.connection() as conn:
async with conn.cursor() as cursor:
await cursor.execute(query, (primary_key,)) # type: ignore[arg-type]
await cursor.execute(query, (primary_key,))
await conn.commit()

def _build_condition(self, condition: Condition | ConditionGroup) -> tuple[Composed, list[Any]]:
Expand Down Expand Up @@ -529,28 +565,33 @@ async def execute_sql(self, sql: str, params: Optional[dict[str, Any]] = None) -
return []

async def create_index(self, index_name: str, columns: list[str], unique: bool = False) -> None:
unique_str = "UNIQUE" if unique else ""
columns_str = ", ".join(columns)
sql = f"CREATE {unique_str} INDEX IF NOT EXISTS {index_name} ON {self.table_name} ({columns_str})"
unique_str = SQL("UNIQUE ") if unique else SQL("")
columns_sql = SQL(", ").join(map(Identifier, map(_validate_column_name, columns)))
query = SQL("CREATE {}INDEX IF NOT EXISTS {} ON {} ({})").format(
unique_str,
Identifier(_validate_column_name(index_name)),
Identifier(_validate_column_name(self.table_name)),
columns_sql,
)

try:
pool = await self._get_pool()
async with pool.connection() as conn:
async with conn.cursor() as cursor:
await cursor.execute(sql) # type: ignore[arg-type]
await cursor.execute(query)
await conn.commit()
except psycopg.Error as e:
print(f"PostgreSQL error during index creation: {e}")
raise e

async def drop_index(self, index_name: str) -> None:
sql = f"DROP INDEX IF EXISTS {index_name}"
query = SQL("DROP INDEX IF EXISTS {}").format(Identifier(_validate_column_name(index_name)))

try:
pool = await self._get_pool()
async with pool.connection() as conn:
async with conn.cursor() as cursor:
await cursor.execute(sql) # type: ignore[arg-type]
await cursor.execute(query)
await conn.commit()
except psycopg.Error as e:
print(f"PostgreSQL error during index deletion: {e}")
Expand Down
6 changes: 6 additions & 0 deletions test_api_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys
try:
import nodetool.api
print("Found nodetool.api")
except Exception as e:
print(f"Error: {e}")
Loading