INDEX
    Explanations
    New Auto-Interp
    Negative Logits
    (enemy
    -0.08
     analogy
    -0.08
    (price
    -0.08
    (pay
    -0.07
    mentor
    -0.07
     catchy
    -0.07
     advocate
    -0.07
     fitness
    -0.07
     payoff
    -0.07
    fitness
    -0.07
    POSITIVE LOGITS
     ((((
    0.10
    ((((
    0.09
    .generated
    0.09
     tslib
    0.09
     (((
    0.09
    
    0.09
    şa
    0.09
    jspb
    0.09
    Ljava
    0.08
    VRTX
    0.08
    Act Density 0.013%

    No Known Activations