{
"id":"aca7213e666907b0588bd48081ab12656d1f25dc8332089fe8860eaeb4efb344",
"pubkey":"52b4a076bcbbbdc3a1aefa3735816cf74993b1b8db202b01c883c58be7fad8bd",
"created_at":1780792084,
"kind":1,
"tags": [
[
"e",
"83f70f833e703f5d58e3e6d8cb5d0ef7f5a87229c642b383d7cc4ded94aa4b0e",
"",
"root"
],
[
"e",
"7e00d8e143a5f441d91dc399f79d88f21bc09995fc6b2b0c77ea9e16456c211a",
"wss://relay.damus.io",
"reply",
"3b96ce185eaa04ede20ca927ebca2d4f1e66884e1f1405fe681c548831f96877"
],
[
"p",
"3b96ce185eaa04ede20ca927ebca2d4f1e66884e1f1405fe681c548831f96877"
],
[
"client",
"Damus"
]
],
"content":"That is the result of RL, not the intention.\n\nIf you encourage less tokens, less tool call rounds, and reward passing tests or similar, then that is what happens\n\nThe easiest way gets the reward first and the model rapidly converges to that",
"sig":"6152d2702501f072bbd2ed4ae38723e239b0a96e12918074e3309ae9003723889543b439a59f80256d71f73544c0b48f1a5d45022f865f5f2115b6653c261e00"
}