-
Notifications
You must be signed in to change notification settings - Fork 30
Is PHP cursed to be much slower? #6
Copy link
Copy link
Closed
Description
Hey, thanks for porting this over! I wanted to move to PHP to remove an extra dependency (docker server exposing Python TikToken over API). I decided to do a small benchmark and it seems that PHP version is greatly slower.
Source for Docker service: https://github.com/flexchar/tiktoken-counter
I use Laravel. I wrote a simple command to tokenize a 100 sentence long text 1000 times.
Median output is around:
Docker time: 4.5049350261688 seconds
PHP time: 20.138854026794 seconds<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Http;
class BenchmarkTikToken extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:benchmark-tik-token';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Benchmark PHP version of TikToken vs. Python using Docker image';
// Store initialized tokenizer
public \Yethee\Tiktoken\Encoder $encoder;
/**
* Execute the console command.
*/
public function handle(): void
{
$this->warn('Make sure to `composer require yethee/tiktoken`.');
$timesToIterate = 1000;
$text = Http::get(
'https://baconipsum.com/api/?type=meat-and-filler¶s=100&format=text',
)
->throw()
->body();
// Warm up the functions
$provider = app(\Yethee\Tiktoken\EncoderProvider::class);
$this->encoder = $provider->getForModel('gpt-4');
$this->countTokens('hello world');
$this->countTokensPhp('hello world');
// Benchmark the functions
$countTokensTime = $this->benchmark(function () use ($text, $timesToIterate) {
foreach (range(1, $timesToIterate) as $_iteration) {
$this->countTokens($text);
}
});
$countTokensPhpTime = $this->benchmark(function () use ($text, $timesToIterate) {
foreach (range(1, $timesToIterate) as $_iteration) {
$this->countTokensPhp($text);
}
});
// Print the results
$this->line("Docker time: {$countTokensTime} seconds");
$this->line("PHP time: {$countTokensPhpTime} seconds");
}
private function benchmark(callable $function): float
{
$start = microtime(true);
$function();
$end = microtime(true);
return $end - $start;
}
public function countTokensPhp(string $text): int
{
$tokens = $this->encoder->encode($text);
return count($tokens);
}
public function countTokens(string $text): int
{
$tokens = Http::post('tiktoken:8000/count', [
'text' => $text,
])
->throw()
->json('tokens');
return (int) ceil($tokens * 1.05);
}
}Running PHP 8.2.10 on Docker on M2.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels