INDEX
    Explanations
    New Auto-Interp
    Negative Logits
    <bos>
    -1.59
    -0.97
    <?
    -0.96
    
    
    -0.85
    /**
    -0.83
    /*
    -0.78
     continue
    -0.70
    /*!
    
    -0.69
    <?
    
    -0.67
    /*++
    -0.66
    POSITIVE LOGITS
     maneu
    1.63
     maroc
    1.59
     affor
    1.59
     embodi
    1.56
     stockholm
    1.52
     roberto
    1.49
     ricardo
    1.46
     accla
    1.46
     lidl
    1.45
     jorge
    1.45
    Act Density 0.280%

    No Known Activations