INDEX
    Explanations
    New Auto-Interp
    Negative Logits
    ))[
    -0.07
    arch
    -0.07
     dolphin
    -0.07
     vegetable
    -0.07
     hovering
    -0.06
     crane
    -0.06
    cow
    -0.06
    ori
    -0.06
     cycling
    -0.06
     ground
    -0.06
    POSITIVE LOGITS
    Miss
    0.08
    dismiss
    0.08
    ميل
    0.08
     dismissed
    0.07
     dismiss
    0.07
    ту
    0.07
     MISS
    0.07
     dismissal
    0.07
    mis
    0.07
    Disposed
    0.07
    Act Density 0.004%

    No Known Activations