INDEX
    Explanations

    quotation marks and quoted speech

    New Auto-Interp
    Negative Logits
    =[];
    
    -0.78
     {}),
    -0.73
    ]');
    -0.71
    pozdrawiam
    -0.70
    _
    
    -0.69
     [];
    
    -0.68
    !
    
    -0.68
    %;
    
    -0.67
    |
    
    -0.67
    \\
    
    -0.67
    POSITIVE LOGITS
    1.55
     “
    1.52
     "
    1.49
     ‘
    1.20
     '
    1.19
    "
    1.18
    ,“
    1.14
    ("
    1.10
    1.08
    1.05
    Act Density 0.638%

    No Known Activations