Source code for ai4drpm.services.resources.document_utils

"""
Unified document utilities for creating and transforming documents.

This module provides centralized utilities for document creation and transformation
to reduce code duplication across classification, analysis, and pipeline services.
"""

from typing import List, Dict, Any


[docs] def create_document_dicts_from_provisions(provisions: List[Dict], celex: str) -> List[Dict[str, Any]]: """ Create document dicts from provision data, ready for JSON serialization over Celery. Args: provisions: List of provision dictionaries from API requests celex: CELEX identifier for the legal document Returns: List of document dicts with content, meta, and id keys Raises: ValueError: If provisions are empty or all lack an eId """ if not provisions: raise ValueError("No provisions provided") documents = [] for provision in provisions: e_id = provision.get("legal_provision_eId") or provision.get("eId", "") if not e_id: continue documents.append({ "content": provision.get("text", "") or provision.get("content", ""), "meta": { "celex": celex, "eId": e_id, "article_heading": provision.get("article_heading", "") } }) if not documents: raise ValueError("No valid provisions found (all missing eId)") return documents