INDEX
    Explanations
    New Auto-Interp
    Negative Logits
    <bos>
    -3.76
    -1.06
    <?
    -0.93
    /***
    
    -0.92
    /**
    -0.86
    
    
    -0.81
    /*
    -0.80
     modernize
    -0.76
    <?
    
    -0.75
     harmonize
    -0.73
    POSITIVE LOGITS
     unlaw
    1.07
     riva
    0.94
     Keny
    0.94
     pleins
    0.94
     quoique
    0.91
     seksi
    0.91
     véhic
    0.90
     unwarran
    0.90
     Muhamma
    0.90
     marea
    0.89
    Act Density 0.175%

    No Known Activations