M3.2-36b / README.md
Tarek07's picture
Update README.md
5e886a8 verified
metadata
license: apache-2.0
base_model:
  - anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only

An upscale of anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only

slices:
- sources:
  - layer_range: [0, 4]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [2, 4]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [4, 8]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [6, 8]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [8, 12]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [10, 12]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [12, 16]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [14, 16]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [16, 20]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [18, 20]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [20, 24]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [22, 24]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [24, 28]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [26, 28]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [28, 32]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [30, 32]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [32, 36]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [34, 36]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
- sources:
  - layer_range: [36, 40]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
- sources:
  - layer_range: [38, 40]
    model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
    parameters:
      scale:
      - filter: o_proj
        value: 0
      - filter: down_proj
        value: 0
      - value: 1
merge_method: passthrough
dtype: bfloat16