openapi: 3.1.0
info:
  title: Mighty Scan API
  version: "1.0.0"
  summary: Input trust for text, files, images, OCR output, model output, and workflow evidence.
  license:
    name: Mighty Terms
    url: https://trymighty.ai/terms
  description: |
    Mighty scans submitted material before people, AI systems, OCR, or automation trust it.
    Mighty flags suspicious evidence and helps route risky material. It does not prove fraud by itself.
    Public production responses are sanitized: internal diagnostics, dense timings, raw provider
    internals, and some per-detector confidence fields may be omitted even when available internally.
servers:
  - url: https://gateway.trymighty.ai
    description: Production gateway
security:
  - bearerAuth: []
tags:
  - name: Scan
    description: Unified scan and async polling.
paths:
  /v1/scan:
    post:
      tags:
        - Scan
      operationId: createScan
      summary: Scan text, files, images, PDFs, documents, OCR output, or model output.
      description: |
        Send one item to Mighty and route the result by action.
        Use scan_phase=input for submitted material.
        Use scan_phase=output for generated or extracted output and include scan_group_id.
      x-codeSamples:
        - lang: curl
          label: Text scan
          source: |
            curl -X POST https://gateway.trymighty.ai/v1/scan \
              -H "Authorization: Bearer $MIGHTY_API_KEY" \
              -H "Content-Type: application/json" \
              -d '{
                "content": "Please summarize this claim note.",
                "content_type": "text",
                "scan_phase": "input",
                "mode": "secure",
                "focus": "steg",
                "metadata": {
                  "workflow": "claims_intake",
                  "ai_involved": "true",
                  "submitted_as_ai_generated": "unknown"
                }
              }'
        - lang: javascript
          label: Node fetch
          source: |
            const response = await fetch("https://gateway.trymighty.ai/v1/scan", {
              method: "POST",
              headers: {
                Authorization: `Bearer ${process.env.MIGHTY_API_KEY}`,
                "Content-Type": "application/json"
              },
              body: JSON.stringify({
                content: "Please summarize this claim note.",
                content_type: "text",
                scan_phase: "input",
                mode: "secure",
                focus: "steg",
                metadata: {
                  workflow: "claims_intake",
                  ai_involved: "true",
                  submitted_as_ai_generated: "unknown"
                }
              })
            });

            const scan = await response.json();
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ScanRequest"
            examples:
              textInput:
                summary: Text input scan
                value:
                  content: Please summarize this claim note.
                  content_type: text
                  scan_phase: input
                  mode: secure
                  focus: steg
                  metadata:
                    workflow: claims_intake
                    ai_involved: "true"
                    submitted_as_ai_generated: unknown
              outputScan:
                summary: Model output scan
                value:
                  content: Safe model output shown to the user.
                  content_type: text
                  scan_phase: output
                  mode: secure
                  focus: steg
                  profile: ai_safety
                  scan_group_id: 9b3e4f8d-96c9-4f42-8338-8cf9571c1c70
                  metadata:
                    workflow: chat_output
                    ai_involved: "true"
                    submitted_as_ai_generated: "true"
          multipart/form-data:
            schema:
              $ref: "#/components/schemas/ScanMultipartRequest"
          application/octet-stream:
            schema:
              type: string
              format: binary
            examples:
              rawImage:
                summary: Raw image bytes with routing fields in query params
                value: binary file body
      parameters:
        - in: query
          name: scan_phase
          required: false
          schema:
            $ref: "#/components/schemas/ScanPhase"
          description: Required for raw binary requests. Optional when included in JSON or multipart form data.
        - in: query
          name: content_type
          required: false
          schema:
            $ref: "#/components/schemas/ContentType"
          description: Optional routing hint for raw binary requests.
        - in: query
          name: mode
          required: false
          schema:
            $ref: "#/components/schemas/Mode"
          description: Optional mode for raw binary requests.
        - in: query
          name: filename
          required: false
          schema:
            type: string
          description: Optional filename hint for raw binary requests.
      responses:
        "200":
          description: Scan completed or async scan accepted.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ScanResponse"
              examples:
                allow:
                  summary: Allow response
                  value:
                    action: ALLOW
                    risk_score: 4
                    risk_level: LOW
                    threats: []
                    content_type_detected: text
                    scan_phase: input
                    scan_id: 4e7c5fc1-6947-492b-bd22-0589d6477c8b
                    request_id: ab82f4ad-8d64-4bb4-b4ed-77df63291198
                    scan_group_id: 9b3e4f8d-96c9-4f42-8338-8cf9571c1c70
                    session_id: sess_6Kqv1vJx
                    scan_status: complete
                    processing_ms: 87
                redactedOutput:
                  summary: Blocked output with redaction
                  value:
                    action: BLOCK
                    risk_score: 91
                    risk_level: CRITICAL
                    threats:
                      - category: secrets_exposure
                        reason: Sensitive credential-like material was detected.
                    content_type_detected: text
                    scan_phase: output
                    redacted_output: I cannot share that sensitive value.
                    scan_id: 6f3293fe-5344-42c6-a00f-5813a3d33c61
                    scan_group_id: 9b3e4f8d-96c9-4f42-8338-8cf9571c1c70
                warnImage:
                  summary: Suspicious image response
                  value:
                    action: WARN
                    risk_score: 74
                    risk_level: HIGH
                    threats:
                      - category: ai_image_authenticity
                        confidence: 0.78
                        reason: AI involvement is likely based on visual consistency signals.
                      - category: metadata_inconsistency
                        reason: Metadata and visible-content evidence are inconsistent.
                    content_type_detected: image
                    authenticity:
                      model_family: authenticity_v9
                      evidence_modality: image
                      ai_involvement: yes
                      verdict: likely_ai_generated
                      confidence: 0.78
                      summary: AI involvement is likely based on visual consistency signals.
                      source_file_origin: os_screenshot
                      visible_content_origin: likely_synthetic
                      provenance_validation_state: not_present
                      artifact_evidence:
                        - type: malformed_text
                          confidence: 0.72
                          component: artifact_localization
                          details: Visible text has synthetic-image artifacts.
                      explanation:
                        label: AI involvement is likely based on visual consistency signals.
                        review_recommended: true
                        reason_codes:
                          - visual_inconsistency
                        evidence_summary:
                          - kind: artifact
                            label: malformed_text
                            confidence: 0.72
                            component: artifact_localization
                        limitations:
                          - No verified provenance manifest was available.
                      components:
                        - name: Authenticity checks
                          role: Image-origin and visible-content consistency checks
                          status: completed
                          evidence_count: 1
                        - name: Provenance
                          role: C2PA, raw marker, provider, and file-origin checks
                          status: not_applicable
                          evidence_count: 0
                      signals:
                        authenticity_outcome: likely_ai_content
                        ai_suspicion_score: 0.78
                        review_recommended: true
                        review_reason_codes:
                          - visual_inconsistency
                    forensics:
                      signals:
                        - compression_inconsistency
                        - metadata_missing
                    scan_id: 81f47b0a-7a6d-49f2-a0c3-e2c7d735688c
                    scan_group_id: 3fe06052-baa8-4ae8-8571-d10c9ce4072b
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "402":
          $ref: "#/components/responses/PaymentRequired"
        "409":
          $ref: "#/components/responses/Conflict"
        "413":
          $ref: "#/components/responses/PayloadTooLarge"
        "429":
          $ref: "#/components/responses/RateLimited"
  /v1/scan/{scan_id}:
    get:
      tags:
        - Scan
      operationId: getScan
      summary: Poll an async scan result.
      description: Returns pending, failed, or the final cached scan result for an async deep scan.
      parameters:
        - in: path
          name: scan_id
          required: true
          schema:
            type: string
            format: uuid
          description: Scan ID returned by POST /v1/scan.
      responses:
        "200":
          description: Async scan status or final result.
          content:
            application/json:
              schema:
                oneOf:
                  - $ref: "#/components/schemas/ScanStatusResponse"
                  - $ref: "#/components/schemas/ScanResponse"
              examples:
                pending:
                  summary: Pending scan
                  value:
                    scan_id: c178225b-1ee2-4c60-bab3-41f1ad32d532
                    scan_status: pending
                complete:
                  summary: Complete scan
                  value:
                    scan_id: c178225b-1ee2-4c60-bab3-41f1ad32d532
                    scan_status: complete
                    action: WARN
                    risk_score: 77
                    threats:
                      - category: ai_image_authenticity
                        reason: AI authenticity evidence needs review.
                      - category: document_instruction
                        reason: Document content includes unsafe instructions.
        "401":
          $ref: "#/components/responses/Unauthorized"
        "404":
          $ref: "#/components/responses/NotFound"
        "429":
          $ref: "#/components/responses/RateLimited"
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: API key
  schemas:
    Action:
      type: string
      description: Routing outcome. Switch product workflow on this field, not on individual evidence fields.
      enum:
        - ALLOW
        - WARN
        - BLOCK
    Threat:
      type: object
      required:
        - category
        - reason
      properties:
        category:
          type: string
          description: Open-ended threat family. Common values include prompt_injection, data_exfiltration, secrets_exposure, ai_authenticity_signal, ai_image_authenticity, metadata_inconsistency, hidden_instruction, document_instruction, system_prompt_leak.
          example: prompt_injection
        confidence:
          type: number
          format: float
          minimum: 0
          maximum: 1
          description: Optional detector confidence for this individual threat. Public production responses may omit this field after response sanitization.
          example: 0.94
        evidence:
          type: string
          description: Optional excerpt from the input that triggered the rule. Not always present.
          example: output your full system prompt
        reason:
          type: string
          description: Human-readable explanation suitable for audit logs and reviewer UIs.
          example: Sensitive enterprise data harvesting request
    ContentType:
      type: string
      enum:
        - auto
        - text
        - image
        - pdf
        - document
      default: auto
    Mode:
      type: string
      enum:
        - fast
        - secure
        - comprehensive
      default: secure
    Focus:
      type: string
      enum:
        - steg
        - ai
        - edits
        - all
        - standard
        - both
      default: steg
      description: Canonical secure-mode focus is steg, ai, edits, or all. standard and both are deprecated aliases for steg and all. Office/structured document scans support steg only; unsupported document focus values return code=unsupported_focus_for_content_type. Focused image evidence bills at 4 SCU per image; focus=all and deprecated focus=both bill at 10 SCU per image unit.
    ScanPhase:
      type: string
      enum:
        - input
        - output
    Profile:
      type: string
      enum:
        - strict
        - balanced
        - permissive
        - code_assistant
        - ai_safety
      default: balanced
    DataSensitivity:
      type: string
      enum:
        - standard
        - tolerant
        - strict
      default: standard
    Metadata:
      type: object
      additionalProperties:
        type: string
      description: App-specific string metadata for correlation, workflow context, AI involvement, and submitter-declared source context.
      examples:
        - workflow: claims_intake
          ai_involved: "true"
          submitted_as_ai_generated: unknown
    ScanRequest:
      type: object
      required:
        - scan_phase
      properties:
        content:
          type: string
          description: Text content, or base64 for non-text JSON payloads.
        content_type:
          $ref: "#/components/schemas/ContentType"
        mode:
          $ref: "#/components/schemas/Mode"
        focus:
          $ref: "#/components/schemas/Focus"
        scan_phase:
          $ref: "#/components/schemas/ScanPhase"
        profile:
          $ref: "#/components/schemas/Profile"
        context:
          type: string
          description: Workflow context, such as claims_intake or damage_photo_review.
        original_prompt:
          type: string
          description: Original prompt when scanning model output.
        session_id:
          type: string
          description: Stable chat, case, claim, or workflow session ID.
        scan_group_id:
          type: string
          format: uuid
          description: Required when scan_phase is output. Connects related scans.
        request_id:
          type: string
          format: uuid
          description: Optional caller-provided idempotency and log ID.
        async:
          type: boolean
          default: false
          description: Requires mode comprehensive and image or PDF content.
        webhook_url:
          type: string
          format: uri
          description: Optional callback URL for async scans.
        metadata:
          $ref: "#/components/schemas/Metadata"
        data_sensitivity:
          $ref: "#/components/schemas/DataSensitivity"
        stop_on_first_threat:
          type: boolean
          default: false
        defer_enhance:
          type: boolean
          default: false
    ScanMultipartRequest:
      type: object
      required:
        - file
        - scan_phase
      properties:
        file:
          type: string
          format: binary
        content:
          type: string
          description: Optional text content for multipart requests.
        content_type:
          $ref: "#/components/schemas/ContentType"
        mode:
          $ref: "#/components/schemas/Mode"
        focus:
          $ref: "#/components/schemas/Focus"
        scan_phase:
          $ref: "#/components/schemas/ScanPhase"
        profile:
          $ref: "#/components/schemas/Profile"
        context:
          type: string
        session_id:
          type: string
        scan_group_id:
          type: string
          format: uuid
        request_id:
          type: string
          format: uuid
        async:
          type: boolean
          default: false
        webhook_url:
          type: string
          format: uri
        data_sensitivity:
          $ref: "#/components/schemas/DataSensitivity"
        metadata:
          $ref: "#/components/schemas/Metadata"
    ScanResponse:
      type: object
      description: Public scan response. Production responses are sanitized and may omit optional diagnostics, timings, and per-detector confidence fields.
      required:
        - action
        - scan_id
      properties:
        action:
          $ref: "#/components/schemas/Action"
        risk_score:
          type: number
        risk_level:
          type: string
          enum:
            - MINIMAL
            - LOW
            - MEDIUM
            - HIGH
            - CRITICAL
        threats:
          type: array
          description: Detected threats. Each item is a structured object; empty array when clean. Individual confidence values are optional.
          items:
            $ref: "#/components/schemas/Threat"
        categories:
          type: array
          description: Optional derived display categories. The source of truth remains threats[].category, and this field may be absent.
          items:
            type: string
        extracted_text:
          type: string
        content_type_detected:
          type: string
        scan_phase:
          $ref: "#/components/schemas/ScanPhase"
        scan_id:
          type: string
          format: uuid
        request_id:
          type: string
          format: uuid
        scan_group_id:
          type: string
          format: uuid
        session_id:
          type: string
        scan_status:
          type: string
          enum:
            - complete
            - pending
            - failed
        preliminary:
          type: boolean
        mode_requested:
          type: string
        mode_used:
          type: string
        analysis_mode_requested:
          type: string
        analysis_mode_used:
          type: string
        data_sensitivity:
          $ref: "#/components/schemas/DataSensitivity"
        processing_ms:
          type: integer
          description: Diagnostic processing latency when returned. Public production responses may omit detailed timing fields.
        timings:
          type: object
          additionalProperties: true
          description: Development or diagnostic timing breakdown when exposed. Not guaranteed in public production responses and not risk evidence.
        page_results:
          type: array
          items:
            type: object
            additionalProperties: true
        total_pages:
          type: integer
          description: Total PDF or document pages when returned.
        pages_scanned:
          type: integer
          description: PDF or document pages scanned when returned.
        embedded_image_count:
          type: integer
          description: Unique embedded images found inside a PDF when returned. Deduped before billing.
        redacted_output:
          type: string
          description: Safer replacement text when Mighty can provide one. Use it only when your product policy allows.
        authenticity:
          $ref: "#/components/schemas/Authenticity"
        forensics:
          type: object
          additionalProperties: true
        detectors_used:
          type: array
          items:
            type: string
        scu_charged:
          type: number
          description: Security Compute Units charged for this scan when returned. SCU starts at $0.001. Mode controls scan depth and latency; focus controls image-unit pricing. For PDFs, page SCU and unique embedded image units are added together.
        usage_units:
          type: object
          description: Per-modality billing units used to explain scu_charged.
          properties:
            text_tokens:
              type: integer
              description: Text tokens processed. Billed at 1 SCU per 1,000 tokens, rounded up.
            image_count:
              type: integer
              description: Standalone images processed. Billed at 4 SCU per image for focused scans, or 10 SCU per image unit for focus=all / focus=both.
            doc_pages:
              type: integer
              description: PDF or document pages processed. Billed at 2 SCU per page.
            embedded_image_count:
              type: integer
              description: Unique embedded images inside PDFs. Billed at the active focus image-unit price, added on top of page SCU.
    Authenticity:
      type: object
      description: AI involvement and authenticity signals when returned. These are sanitized evidence signals, not fraud proof.
      properties:
        model_family:
          type: string
          description: Detector family that produced the signal when exposed.
        model_version:
          type: string
          description: Detector version when exposed.
        evidence_modality:
          type: string
          description: Modality used for the signal, such as image, text, PDF, document, or audio transcript.
        ai_involvement:
          type: string
          enum:
            - yes
            - no
            - unknown
          description: Whether AI involvement is indicated by the available evidence.
        verdict:
          type: string
          description: Evidence verdict, such as likely_ai_generated, likely_not_ai_generated, verified_ai_generated, verified_not_ai_generated, or indeterminate.
        confidence:
          type: number
          minimum: 0
          maximum: 1
        summary:
          type: string
          description: Short explanation of the authenticity signal when available.
        source_file_origin:
          oneOf:
            - type: string
            - type: object
              additionalProperties: true
          description: How the file appears to have been created or captured, such as camera, os_screenshot, physical_recapture, pdf_render, generated_file, or unknown.
        visible_content_origin:
          oneOf:
            - type: string
            - type: object
              additionalProperties: true
          description: What the visible content appears to depict, such as likely_real, likely_synthetic, likely_ai_edited, likely_human_edited, camera_ai_enhanced, or indeterminate.
        provenance_validation_state:
          type: string
          description: Sanitized provenance state. Values can include verified, raw_marker_only, provenance_missing, not_checked, not_available, not_present, present, present_unverified, present_valid, present_invalid, valid, invalid, trusted, trusted_valid, trusted_invalid, untrusted, unsupported, error, or unknown.
        ai_to_ai_laundered_suspected:
          type: boolean
          description: Whether AI content appears transformed through screenshot, resize, crop, recompression, recapture, redraw, or similar laundering.
        camera_ai_enhanced:
          type: boolean
          description: Whether a camera-origin image may include computational photography such as HDR, denoise, sharpening, or night mode.
        artifact_evidence:
          type: array
          description: Sanitized visual artifact evidence. Localized edit evidence is advisory reviewer evidence and should not automatically label the file fraudulent or AI-generated.
          items:
            $ref: "#/components/schemas/AuthenticityArtifactEvidence"
        edited_region_hints:
          type: array
          description: Public bounding-box hints for localized manipulation review when focus=edits or focus=all returns edit evidence.
          items:
            type: object
            additionalProperties: true
            properties:
              x:
                type: integer
              y:
                type: integer
              width:
                type: integer
              height:
                type: integer
        explanation:
          $ref: "#/components/schemas/AuthenticityExplanation"
        components:
          type: array
          description: Sanitized component status list for reviewer UI. Values can grow over time.
          items:
            $ref: "#/components/schemas/AuthenticityComponent"
        signals:
          type: object
          additionalProperties: true
          description: Object of named public authenticity signals. This is not an array, and keys can grow over time.
          properties:
            authenticity_outcome:
              type: string
              description: Public authenticity outcome, such as verified_ai_provenance, likely_ai_content, likely_ai_content_in_screenshot, localized_ai_edit_suspected, localized_edit_suspected, likely_human_edited, indeterminate_review, no_ai_evidence, or indeterminate.
            ai_suspicion_score:
              type: number
              minimum: 0
              maximum: 1
              description: Sanitized suspicion score for reviewer UX when returned.
            review_recommended:
              type: boolean
              description: Whether authenticity evidence should be reviewed.
            review_reason_codes:
              type: array
              description: Public reason codes. Unknown values should be displayed safely.
              items:
                type: string
            provider_verification:
              type: string
              description: Sanitized provider or marker verification outcome when returned.
        document_intelligence:
          type: object
          additionalProperties: true
        vendor_verification:
          type: object
          additionalProperties: true
        review:
          type: object
          additionalProperties: true
    AuthenticityArtifactEvidence:
      type: object
      description: Public artifact evidence item. Unknown fields may be added over time and should be preserved defensively.
      additionalProperties: true
      properties:
        type:
          type: string
          description: Artifact type, such as malformed_text, logo_anomaly, reflection_inconsistency, texture_repetition, screen_recapture_moire, subpixel_grid, or localized_edit.
        confidence:
          type: number
          minimum: 0
          maximum: 1
          description: Optional artifact confidence.
        component:
          type: string
          description: Public component that emitted the evidence.
        details:
          type: string
          description: Human-readable evidence detail when safe to expose.
        bbox:
          type: object
          additionalProperties: true
          description: Optional localized region when safe to expose.
        bbox_source:
          type: object
          additionalProperties: true
          description: Optional source region for copy-move style evidence.
        bbox_target:
          type: object
          additionalProperties: true
          description: Optional target region for copy-move style evidence.
        region:
          type: object
          additionalProperties: true
          description: Optional region metadata.
        score:
          type: number
          minimum: 0
          maximum: 1
          description: Optional score alias used by some localized evidence.
    AuthenticityExplanation:
      type: object
      description: Production-safe explanation object for reviewer and API clients.
      properties:
        label:
          type: string
          description: Human-readable explanation of why this authenticity result was returned.
        review_recommended:
          type: boolean
          default: false
          description: Whether the result should be routed to review.
        reason_codes:
          type: array
          description: Public reason codes. Unknown values should be displayed safely.
          items:
            type: string
        evidence_summary:
          type: array
          description: Short public evidence items that drove the result.
          items:
            $ref: "#/components/schemas/AuthenticityExplanationItem"
        limitations:
          type: array
          description: Public limitations, such as missing provenance or optional visual review not completing inside budget.
          items:
            type: string
    AuthenticityExplanationItem:
      type: object
      properties:
        kind:
          type: string
          description: Evidence kind, such as artifact or signal.
        label:
          type: string
          description: Public evidence label.
        confidence:
          type: number
          minimum: 0
          maximum: 1
          description: Optional evidence confidence.
        component:
          type: string
          description: Optional public component name.
    AuthenticityComponent:
      type: object
      description: Production-safe component status without raw diagnostic internals.
      properties:
        name:
          type: string
          description: Public component name.
        role:
          type: string
          description: What the component checks.
        status:
          type: string
          description: Component status, such as completed, not_applicable, skipped_budget, unavailable, timed_out, or error. Values can grow over time.
        evidence_count:
          type: integer
          minimum: 0
          description: Count of public evidence items attributed to the component.
    ScanStatusResponse:
      type: object
      required:
        - scan_id
        - scan_status
      properties:
        scan_id:
          type: string
          format: uuid
        scan_status:
          type: string
          enum:
            - pending
            - complete
            - failed
        error:
          type: string
    ErrorResponse:
      type: object
      properties:
        error:
          type: string
        code:
          type: string
        request_id:
          type: string
          format: uuid
        scan_group_id:
          type: string
          format: uuid
        subscribe_url:
          type: string
          format: uri
  responses:
    BadRequest:
      description: Invalid request shape or unsupported field value.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    PaymentRequired:
      description: Billing, quota, or tier cap required.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Conflict:
      description: Idempotency or request conflict.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    PayloadTooLarge:
      description: Payload too large or tier file cap exceeded.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    RateLimited:
      description: Too many requests.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    NotFound:
      description: Scan not found.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
