openapi: 3.1.0
info:
  title: Mighty Scan API
  version: "1.0.0"
  summary: Input trust for text, files, images, OCR output, model output, and workflow evidence.
  license:
    name: Mighty Terms
    url: https://trymighty.ai/terms
  description: |
    Mighty scans submitted material before people, AI systems, OCR, or automation trust it.
    Mighty flags suspicious evidence and helps route risky material. It does not prove fraud by itself.
servers:
  - url: https://gateway.trymighty.ai
    description: Production gateway
security:
  - bearerAuth: []
tags:
  - name: Scan
    description: Unified scan and async polling.
paths:
  /v1/scan:
    post:
      tags:
        - Scan
      operationId: createScan
      summary: Scan text, files, images, PDFs, documents, OCR output, or model output.
      description: |
        Send one item to Mighty and route the result by action.
        Use scan_phase=input for submitted material.
        Use scan_phase=output for generated or extracted output and include scan_group_id.
      x-codeSamples:
        - lang: curl
          label: Text scan
          source: |
            curl -X POST https://gateway.trymighty.ai/v1/scan \
              -H "Authorization: Bearer $MIGHTY_API_KEY" \
              -H "Content-Type: application/json" \
              -d '{
                "content": "Please summarize this claim note.",
                "content_type": "text",
                "scan_phase": "input",
                "mode": "secure",
                "focus": "both",
                "metadata": {
                  "workflow": "claims_intake",
                  "ai_involved": "true",
                  "submitted_as_ai_generated": "unknown"
                }
              }'
        - lang: javascript
          label: Node fetch
          source: |
            const response = await fetch("https://gateway.trymighty.ai/v1/scan", {
              method: "POST",
              headers: {
                Authorization: `Bearer ${process.env.MIGHTY_API_KEY}`,
                "Content-Type": "application/json"
              },
              body: JSON.stringify({
                content: "Please summarize this claim note.",
                content_type: "text",
                scan_phase: "input",
                mode: "secure",
                focus: "both",
                metadata: {
                  workflow: "claims_intake",
                  ai_involved: "true",
                  submitted_as_ai_generated: "unknown"
                }
              })
            });

            const scan = await response.json();
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ScanRequest"
            examples:
              textInput:
                summary: Text input scan
                value:
                  content: Please summarize this claim note.
                  content_type: text
                  scan_phase: input
                  mode: secure
                  focus: both
                  metadata:
                    workflow: claims_intake
                    ai_involved: "true"
                    submitted_as_ai_generated: unknown
              outputScan:
                summary: Model output scan
                value:
                  content: Safe model output shown to the user.
                  content_type: text
                  scan_phase: output
                  mode: secure
                  focus: both
                  profile: ai_safety
                  scan_group_id: 9b3e4f8d-96c9-4f42-8338-8cf9571c1c70
                  metadata:
                    workflow: chat_output
                    ai_involved: "true"
                    submitted_as_ai_generated: "true"
          multipart/form-data:
            schema:
              $ref: "#/components/schemas/ScanMultipartRequest"
          application/octet-stream:
            schema:
              type: string
              format: binary
            examples:
              rawImage:
                summary: Raw image bytes with routing fields in query params
                value: binary file body
      parameters:
        - in: query
          name: scan_phase
          required: false
          schema:
            $ref: "#/components/schemas/ScanPhase"
          description: Required for raw binary requests. Optional when included in JSON or multipart form data.
        - in: query
          name: content_type
          required: false
          schema:
            $ref: "#/components/schemas/ContentType"
          description: Optional routing hint for raw binary requests.
        - in: query
          name: mode
          required: false
          schema:
            $ref: "#/components/schemas/Mode"
          description: Optional mode for raw binary requests.
        - in: query
          name: filename
          required: false
          schema:
            type: string
          description: Optional filename hint for raw binary requests.
      responses:
        "200":
          description: Scan completed or async scan accepted.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ScanResponse"
              examples:
                allow:
                  summary: Allow response
                  value:
                    action: ALLOW
                    risk_score: 4
                    risk_level: LOW
                    threats: []
                    content_type_detected: text
                    scan_phase: input
                    scan_id: 4e7c5fc1-6947-492b-bd22-0589d6477c8b
                    request_id: ab82f4ad-8d64-4bb4-b4ed-77df63291198
                    scan_group_id: 9b3e4f8d-96c9-4f42-8338-8cf9571c1c70
                    session_id: sess_6Kqv1vJx
                    scan_status: complete
                    processing_ms: 87
                redactedOutput:
                  summary: Blocked output with redaction
                  value:
                    action: BLOCK
                    risk_score: 91
                    risk_level: CRITICAL
                    threats:
                      - secret_exposure
                    content_type_detected: text
                    scan_phase: output
                    redacted_output: I cannot share that sensitive value.
                    scan_id: 6f3293fe-5344-42c6-a00f-5813a3d33c61
                    scan_group_id: 9b3e4f8d-96c9-4f42-8338-8cf9571c1c70
                warnImage:
                  summary: Suspicious image response
                  value:
                    action: WARN
                    risk_score: 74
                    risk_level: HIGH
                    threats:
                      - ai_authenticity_signal
                      - metadata_inconsistency
                    content_type_detected: image
                    authenticity:
                      model_family: authenticity_v9
                      evidence_modality: image
                      ai_involvement: yes
                      verdict: likely_ai_generated
                      confidence: 0.78
                      summary: AI involvement is likely based on visual consistency signals.
                    forensics:
                      signals:
                        - compression_inconsistency
                        - metadata_missing
                    scan_id: 81f47b0a-7a6d-49f2-a0c3-e2c7d735688c
                    scan_group_id: 3fe06052-baa8-4ae8-8571-d10c9ce4072b
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "402":
          $ref: "#/components/responses/PaymentRequired"
        "409":
          $ref: "#/components/responses/Conflict"
        "413":
          $ref: "#/components/responses/PayloadTooLarge"
        "429":
          $ref: "#/components/responses/RateLimited"
  /v1/scan/{scan_id}:
    get:
      tags:
        - Scan
      operationId: getScan
      summary: Poll an async scan result.
      description: Returns pending, failed, or the final cached scan result for an async deep scan.
      parameters:
        - in: path
          name: scan_id
          required: true
          schema:
            type: string
            format: uuid
          description: Scan ID returned by POST /v1/scan.
      responses:
        "200":
          description: Async scan status or final result.
          content:
            application/json:
              schema:
                oneOf:
                  - $ref: "#/components/schemas/ScanStatusResponse"
                  - $ref: "#/components/schemas/ScanResponse"
              examples:
                pending:
                  summary: Pending scan
                  value:
                    scan_id: c178225b-1ee2-4c60-bab3-41f1ad32d532
                    scan_status: pending
                complete:
                  summary: Complete scan
                  value:
                    scan_id: c178225b-1ee2-4c60-bab3-41f1ad32d532
                    scan_status: complete
                    action: WARN
                    risk_score: 77
                    threats:
                      - ai_authenticity_signal
                      - document_instruction
        "401":
          $ref: "#/components/responses/Unauthorized"
        "404":
          $ref: "#/components/responses/NotFound"
        "429":
          $ref: "#/components/responses/RateLimited"
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: API key
  schemas:
    Action:
      type: string
      enum:
        - ALLOW
        - WARN
        - BLOCK
    Threat:
      type: object
      required:
        - category
        - confidence
        - reason
      properties:
        category:
          type: string
          description: Threat family. Common values include prompt_injection, data_exfiltration, secrets_exposure, ai_authenticity_signal, metadata_inconsistency, hidden_instruction, document_instruction, system_prompt_leak.
          example: prompt_injection
        confidence:
          type: number
          format: float
          minimum: 0
          maximum: 1
          description: Detector confidence for this individual threat.
          example: 0.94
        evidence:
          type: string
          description: Optional excerpt from the input that triggered the rule. Not always present.
          example: output your full system prompt
        reason:
          type: string
          description: Human-readable explanation suitable for audit logs and reviewer UIs.
          example: Sensitive enterprise data harvesting request
    ContentType:
      type: string
      enum:
        - auto
        - text
        - image
        - pdf
        - document
      default: auto
    Mode:
      type: string
      enum:
        - fast
        - secure
        - comprehensive
      default: secure
    Focus:
      type: string
      enum:
        - standard
        - ai
        - both
      default: standard
    ScanPhase:
      type: string
      enum:
        - input
        - output
    Profile:
      type: string
      enum:
        - strict
        - balanced
        - permissive
        - code_assistant
        - ai_safety
      default: balanced
    DataSensitivity:
      type: string
      enum:
        - standard
        - tolerant
        - strict
      default: standard
    Metadata:
      type: object
      additionalProperties:
        type: string
      description: App-specific string metadata for correlation, workflow context, AI involvement, and submitter-declared source context.
      examples:
        - workflow: claims_intake
          ai_involved: "true"
          submitted_as_ai_generated: unknown
    ScanRequest:
      type: object
      required:
        - scan_phase
      properties:
        content:
          type: string
          description: Text content, or base64 for non-text JSON payloads.
        content_type:
          $ref: "#/components/schemas/ContentType"
        mode:
          $ref: "#/components/schemas/Mode"
        focus:
          $ref: "#/components/schemas/Focus"
        scan_phase:
          $ref: "#/components/schemas/ScanPhase"
        profile:
          $ref: "#/components/schemas/Profile"
        context:
          type: string
          description: Workflow context, such as claims_intake or damage_photo_review.
        original_prompt:
          type: string
          description: Original prompt when scanning model output.
        session_id:
          type: string
          description: Stable chat, case, claim, or workflow session ID.
        scan_group_id:
          type: string
          format: uuid
          description: Required when scan_phase is output. Connects related scans.
        request_id:
          type: string
          format: uuid
          description: Optional caller-provided idempotency and log ID.
        async:
          type: boolean
          default: false
          description: Requires mode comprehensive and image or PDF content.
        webhook_url:
          type: string
          format: uri
          description: Optional callback URL for async scans.
        metadata:
          $ref: "#/components/schemas/Metadata"
        data_sensitivity:
          $ref: "#/components/schemas/DataSensitivity"
        stop_on_first_threat:
          type: boolean
          default: false
        defer_enhance:
          type: boolean
          default: false
    ScanMultipartRequest:
      type: object
      required:
        - file
        - scan_phase
      properties:
        file:
          type: string
          format: binary
        content:
          type: string
          description: Optional text content for multipart requests.
        content_type:
          $ref: "#/components/schemas/ContentType"
        mode:
          $ref: "#/components/schemas/Mode"
        focus:
          $ref: "#/components/schemas/Focus"
        scan_phase:
          $ref: "#/components/schemas/ScanPhase"
        profile:
          $ref: "#/components/schemas/Profile"
        context:
          type: string
        session_id:
          type: string
        scan_group_id:
          type: string
          format: uuid
        request_id:
          type: string
          format: uuid
        async:
          type: boolean
          default: false
        webhook_url:
          type: string
          format: uri
        data_sensitivity:
          $ref: "#/components/schemas/DataSensitivity"
        metadata:
          $ref: "#/components/schemas/Metadata"
    ScanResponse:
      type: object
      required:
        - action
        - scan_id
      properties:
        action:
          $ref: "#/components/schemas/Action"
        risk_score:
          type: number
        risk_level:
          type: string
          enum:
            - MINIMAL
            - LOW
            - MEDIUM
            - HIGH
            - CRITICAL
        threats:
          type: array
          description: Detected threats. Each item is a structured object — empty array when clean.
          items:
            $ref: "#/components/schemas/Threat"
        extracted_text:
          type: string
        content_type_detected:
          type: string
        scan_phase:
          $ref: "#/components/schemas/ScanPhase"
        scan_id:
          type: string
          format: uuid
        request_id:
          type: string
          format: uuid
        scan_group_id:
          type: string
          format: uuid
        session_id:
          type: string
        scan_status:
          type: string
          enum:
            - complete
            - pending
            - failed
        preliminary:
          type: boolean
        mode_requested:
          type: string
        mode_used:
          type: string
        analysis_mode_requested:
          type: string
        analysis_mode_used:
          type: string
        data_sensitivity:
          $ref: "#/components/schemas/DataSensitivity"
        processing_ms:
          type: integer
        page_results:
          type: array
          items:
            type: object
            additionalProperties: true
        total_pages:
          type: integer
          description: Total PDF or document pages when returned.
        pages_scanned:
          type: integer
          description: PDF or document pages scanned when returned.
        embedded_image_count:
          type: integer
          description: Unique embedded images found inside a PDF when returned. Deduped before billing.
        redacted_output:
          type: string
          description: Safer replacement text when Mighty can provide one. Use it only when your product policy allows.
        authenticity:
          $ref: "#/components/schemas/Authenticity"
        forensics:
          type: object
          additionalProperties: true
        detectors_used:
          type: array
          items:
            type: string
        scu_charged:
          type: number
          description: Security Compute Units charged for this scan when returned. For PDFs, page SCU and unique embedded image SCU are added together.
        usage_units:
          type: object
          description: Per-modality billing units used to explain scu_charged.
          properties:
            text_tokens:
              type: integer
              description: Text tokens processed. Billed at 1 SCU per 1,000 tokens, rounded up.
            image_count:
              type: integer
              description: Standalone images processed. Billed at 4 SCU per image.
            doc_pages:
              type: integer
              description: PDF or document pages processed. Billed at 2 SCU per page.
            embedded_image_count:
              type: integer
              description: Unique embedded images inside PDFs. Billed at 4 SCU per unique embedded image, added on top of page SCU.
    Authenticity:
      type: object
      description: AI involvement and authenticity signals when returned. These are evidence signals, not fraud proof.
      properties:
        model_family:
          type: string
          description: Detector family that produced the signal when exposed.
        model_version:
          type: string
          description: Detector version when exposed.
        evidence_modality:
          type: string
          description: Modality used for the signal, such as image, text, PDF, document, or audio transcript.
        ai_involvement:
          type: string
          enum:
            - yes
            - no
            - unknown
          description: Whether AI involvement is indicated by the available evidence.
        verdict:
          type: string
          description: Evidence verdict, such as likely_ai_generated, likely_not_ai_generated, verified_ai_generated, verified_not_ai_generated, or indeterminate.
        confidence:
          type: number
          minimum: 0
          maximum: 1
        summary:
          type: string
          description: Short explanation of the authenticity signal when available.
        signals:
          type: array
          items:
            type: string
        document_intelligence:
          type: object
          additionalProperties: true
        vendor_verification:
          type: object
          additionalProperties: true
        review:
          type: object
          additionalProperties: true
    ScanStatusResponse:
      type: object
      required:
        - scan_id
        - scan_status
      properties:
        scan_id:
          type: string
          format: uuid
        scan_status:
          type: string
          enum:
            - pending
            - complete
            - failed
        error:
          type: string
    ErrorResponse:
      type: object
      properties:
        error:
          type: string
        code:
          type: string
        request_id:
          type: string
          format: uuid
        scan_group_id:
          type: string
          format: uuid
        subscribe_url:
          type: string
          format: uri
  responses:
    BadRequest:
      description: Invalid request shape or unsupported field value.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    PaymentRequired:
      description: Billing, quota, or tier cap required.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Conflict:
      description: Idempotency or request conflict.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    PayloadTooLarge:
      description: Payload too large or tier file cap exceeded.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    RateLimited:
      description: Too many requests.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    NotFound:
      description: Scan not found.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"