INDEX
    Explanations
    New Auto-Interp
    Negative Logits
    <bos>
    -2.28
    -1.25
    
    
    -1.19
    <?
    -1.05
     cabrio
    -0.97
    /**
    -0.94
    /***
    
    -0.85
    ///**
    -0.84
    <?
    
    -0.82
    },[])
    -0.78
    POSITIVE LOGITS
     Heere
    0.63
     Ueb
    0.62
     pubg
    0.60
     Theile
    0.59
     désol
    0.59
     egreg
    0.58
     Departement
    0.58
     maneu
    0.58
     impra
    0.57
     lgbt
    0.56
    Act Density 0.296%

    No Known Activations