[Examples][Store] Implement indexing pipeline

This commit is contained in:
Oskar Stark
2025-09-08 12:56:08 +02:00
committed by Christopher Hertel
parent 2a09fb0521
commit 29349c1104
30 changed files with 1177 additions and 130 deletions

View File

@@ -36,7 +36,7 @@ composer install
echo "OPENAI_API_KEY='sk-...'" > .env.local
# Initialize vector store
symfony console app:blog:embed -vv
symfony console ai:store:index blog -vv
# Test vector store
symfony console app:blog:query

View File

@@ -74,7 +74,7 @@ The [Chroma DB](https://www.trychroma.com/) is a vector store that is used to st
To initialize the Chroma DB, you need to run the following command:
```shell
symfony console app:blog:embed -vv
symfony console ai:store:index blog -vv
```
Now you should be able to run the test command and get some results:

View File

@@ -59,7 +59,11 @@ ai:
class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_ADA_002
indexer:
default:
blog:
loader: 'App\Blog\FeedLoader'
source: 'https://feeds.feedburner.com/symfony/blog'
transformers:
- 'Symfony\AI\Store\Document\Transformer\TextTrimTransformer'
vectorizer: 'ai.vectorizer.openai_embeddings'
store: 'ai.store.chroma_db.symfonycon'
@@ -75,3 +79,5 @@ services:
Symfony\AI\Agent\Toolbox\Tool\Wikipedia: ~
Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch:
$vectorizer: '@ai.vectorizer.openai_embeddings'
Symfony\AI\Store\Document\Transformer\TextTrimTransformer: ~

View File

@@ -1,37 +0,0 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace App\Blog\Command;
use App\Blog\Embedder;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand('app:blog:embed', description: 'Create embeddings for Symfony blog and push to ChromaDB.')]
final class EmbedCommand
{
public function __construct(
private readonly Embedder $embedder,
) {
}
public function __invoke(SymfonyStyle $io): int
{
$io->title('Loading RSS of Symfony blog as embeddings into ChromaDB');
$this->embedder->embedBlog();
$io->success('Symfony Blog Successfully Embedded!');
return Command::SUCCESS;
}
}

View File

@@ -1,35 +0,0 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace App\Blog;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\IndexerInterface;
final readonly class Embedder
{
public function __construct(
private FeedLoader $loader,
private IndexerInterface $indexer,
) {
}
public function embedBlog(): void
{
$documents = [];
foreach ($this->loader->load() as $post) {
$documents[] = new TextDocument($post->id, $post->toString(), new Metadata($post->toArray()));
}
$this->indexer->index($documents);
}
}

View File

@@ -11,11 +11,15 @@
namespace App\Blog;
use Symfony\AI\Store\Document\LoaderInterface;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Exception\InvalidArgumentException;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Uid\Uuid;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class FeedLoader
final class FeedLoader implements LoaderInterface
{
public function __construct(
private HttpClientInterface $httpClient,
@@ -23,11 +27,17 @@ class FeedLoader
}
/**
* @return Post[]
* @param ?string $source RSS feed URL
* @param array<string, mixed> $options
*
* @return iterable<TextDocument>
*/
public function load(): array
public function load(?string $source, array $options = []): iterable
{
$result = $this->httpClient->request('GET', 'https://feeds.feedburner.com/symfony/blog');
if (null === $source) {
throw new InvalidArgumentException('FeedLoader requires a RSS feed URL as source, null given.');
}
$result = $this->httpClient->request('GET', $source);
$posts = [];
$crawler = new Crawler($result->getContent());
@@ -44,6 +54,8 @@ class FeedLoader
);
});
return $posts;
foreach ($posts as $post) {
yield new TextDocument($post->id, $post->toString(), new Metadata($post->toArray()));
}
}
}

View File

@@ -0,0 +1,47 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
use Symfony\AI\Store\Document\Loader\TextFileLoader;
use Symfony\AI\Store\Document\Transformer\TextReplaceTransformer;
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
use Symfony\AI\Store\Document\Vectorizer;
use Symfony\AI\Store\Indexer;
require_once dirname(__DIR__).'/bootstrap.php';
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
$store = new InMemoryStore();
$vectorizer = new Vectorizer($platform, new Embeddings('text-embedding-3-small'));
$indexer = new Indexer(
loader: new TextFileLoader(),
vectorizer: $vectorizer,
store: $store,
source: [
dirname(__DIR__, 2).'/fixtures/movies/gladiator.md',
dirname(__DIR__, 2).'/fixtures/movies/inception.md',
dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md',
],
transformers: [
new TextReplaceTransformer(search: '## Plot', replace: '## Synopsis'),
new TextSplitTransformer(chunkSize: 500, overlap: 100),
],
);
$indexer->index();
$vector = $vectorizer->vectorize('Roman gladiator revenge');
$results = $store->query($vector);
foreach ($results as $i => $document) {
echo sprintf("%d. %s\n", $i + 1, substr($document->id, 0, 40).'...');
}

View File

@@ -0,0 +1,58 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
use Symfony\AI\Store\Document\Vectorizer;
use Symfony\AI\Store\Indexer;
use Symfony\Component\Uid\Uuid;
require_once dirname(__DIR__).'/bootstrap.php';
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
$store = new InMemoryStore();
$vectorizer = new Vectorizer($platform, new Embeddings('text-embedding-3-small'));
$documents = [
new TextDocument(
Uuid::v4(),
'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.',
new Metadata(['title' => 'AI Revolution'])
),
new TextDocument(
Uuid::v4(),
'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.',
new Metadata(['title' => 'Climate Action'])
),
];
$indexer = new Indexer(
loader: new InMemoryLoader($documents),
vectorizer: $vectorizer,
store: $store,
source: null,
transformers: [
new TextSplitTransformer(chunkSize: 100, overlap: 20),
],
);
$indexer->index();
$vector = $vectorizer->vectorize('machine learning artificial intelligence');
$results = $store->query($vector);
foreach ($results as $i => $document) {
echo sprintf("%d. %s\n", $i + 1, substr($document->id, 0, 40).'...');
}

View File

@@ -0,0 +1,32 @@
# Gladiator (2000)
**IMDB**: https://www.imdb.com/title/tt0172495/
**Director:** Ridley Scott
## Cast
- **Russell Crowe** as Maximus Decimus Meridius
- **Joaquin Phoenix** as Emperor Commodus
- **Connie Nielsen** as Lucilla
- **Oliver Reed** as Proximo
- **Derek Jacobi** as Senator Gracchus
- **Djimon Hounsou** as Juba
- **Richard Harris** as Marcus Aurelius
- **Ralf Möller** as Hagen
- **Tommy Flanagan** as Cicero
- **David Schofield** as Falco
## Plot
A former Roman General sets out to exact vengeance against the corrupt emperor who murdered his family and sent him into slavery.
**Maximus Decimus Meridius** is a powerful Roman general beloved by the people and the aging Emperor **Marcus Aurelius**. As Marcus Aurelius lies dying, he makes known his wish that Maximus should succeed him and return Rome to the former glory of the Republic rather than the corrupt Empire it has become.
However, Marcus Aurelius's son **Commodus** learns of his father's plan and murders him before he can publicly name Maximus as his successor. Commodus then orders the execution of Maximus and his family. Maximus escapes the execution but arrives at his farm too late to save his wife and son.
Wounded and devastated, Maximus is captured by slave traders and forced to become a gladiator. Under the training of **Proximo**, a former gladiator, Maximus becomes a skilled fighter and eventually makes his way to the **Colosseum** in Rome, where he gains fame and the crowd's favor.
Using his newfound popularity with the people, Maximus seeks to avenge the murder of his family and fulfill his promise to Marcus Aurelius to restore Rome to a republic. The film culminates in a final confrontation between Maximus and Commodus in the arena.
The film explores themes of *honor*, *revenge*, *political corruption*, and the struggle between personal desires and duty to the greater good.

View File

@@ -0,0 +1,28 @@
# Inception (2010)
**IMDB**: https://www.imdb.com/title/tt1375666/
**Director:** Christopher Nolan
## Cast
- **Leonardo DiCaprio** as Dom Cobb
- **Marion Cotillard** as Mal Cobb
- **Tom Hardy** as Eames
- **Elliot Page** as Ariadne
- **Ken Watanabe** as Saito
- **Dileep Rao** as Yusuf
- **Cillian Murphy** as Robert Fischer Jr.
- **Tom Berenger** as Peter Browning
- **Michael Caine** as Professor Stephen Miles
- **Lukas Haas** as Nash
## Plot
A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone's subconscious.
**Dom Cobb** is a skilled thief who specializes in *extraction* - stealing secrets from people's subconscious minds while they dream. This unique skill has made him a valuable player in the world of corporate espionage, but it has also cost him everything he loves. Cobb's rare ability has made him a coveted player in this treacherous new world of corporate espionage, but it has also made him an international fugitive and cost him everything he has ever loved.
Now Cobb is being offered a chance at redemption. One last job could give him his life back but only if he can accomplish the impossible - **inception**. Instead of the perfect heist, Cobb and his team of specialists have to pull off the reverse: their task is not to steal an idea but to plant one. If they succeed, it could be the perfect crime.
The film explores themes of *reality*, *dreams*, *memory*, and the nature of consciousness through multiple layers of dream states, creating a complex narrative structure that challenges both characters and audience to question what is real.

View File

@@ -0,0 +1,30 @@
# Jurassic Park (1993)
**IMDB**: https://www.imdb.com/title/tt0107290/
**Director:** Steven Spielberg
## Cast
- **Sam Neill** as Dr. Alan Grant
- **Laura Dern** as Dr. Ellie Sattler
- **Jeff Goldblum** as Dr. Ian Malcolm
- **Richard Attenborough** as John Hammond
- **Bob Peck** as Robert Muldoon
- **Martin Ferrero** as Donald Gennaro
- **BD Wong** as Dr. Henry Wu
- **Joseph Mazzello** as Tim Murphy
- **Ariana Richards** as Lex Murphy
- **Wayne Knight** as Dennis Nedry
## Plot
During a preview tour, a theme park suffers a major power breakdown that allows its cloned dinosaur exhibits to run amok.
Billionaire **John Hammond** has created a theme park on a remote island where he has successfully cloned dinosaurs from ancient DNA found in prehistoric mosquitoes preserved in amber. Before opening to the public, Hammond invites a select group of people to tour the park, including paleontologist **Dr. Alan Grant**, paleobotanist **Dr. Ellie Sattler**, and mathematician **Dr. Ian Malcolm**.
The tour begins smoothly, but things quickly go wrong when the park's computer systems are sabotaged by the disgruntled programmer **Dennis Nedry**, who is attempting to steal dinosaur embryos. The security systems fail, and the dinosaurs break free from their enclosures.
As the island descends into chaos, the visitors must survive encounters with various dangerous dinosaurs, including the intelligent and deadly **Velociraptors** and the massive **Tyrannosaurus Rex**. Dr. Grant finds himself responsible for Hammond's grandchildren, Tim and Lex, as they attempt to reach safety.
The film explores themes of *scientific ethics*, the *hubris of trying to control nature*, and the *unintended consequences of genetic engineering*. It questions whether humans have the right to resurrect extinct species and whether scientific advancement should be pursued without considering the potential risks and moral implications.

View File

@@ -397,6 +397,19 @@ return static function (DefinitionConfigurator $configurator): void {
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->scalarNode('loader')
->info('Service name of loader')
->isRequired()
->end()
->variableNode('source')
->info('Source identifier (file path, URL, etc.) or array of sources')
->defaultNull()
->end()
->arrayNode('transformers')
->info('Array of transformer service names')
->scalarPrototype()->end()
->defaultValue([])
->end()
->scalarNode('vectorizer')
->info('Service name of vectorizer')
->defaultValue(VectorizerInterface::class)

View File

@@ -38,6 +38,7 @@ use Symfony\AI\Platform\Contract;
use Symfony\AI\Platform\Contract\JsonSchema\DescriptionParser;
use Symfony\AI\Platform\Contract\JsonSchema\Factory as SchemaFactory;
use Symfony\AI\Store\Command\DropStoreCommand;
use Symfony\AI\Store\Command\IndexCommand;
use Symfony\AI\Store\Command\SetupStoreCommand;
return static function (ContainerConfigurator $container): void {
@@ -157,5 +158,10 @@ return static function (ContainerConfigurator $container): void {
tagged_locator('ai.store', indexAttribute: 'name'),
])
->tag('console.command')
->set('ai.command.index', IndexCommand::class)
->args([
tagged_locator('ai.indexer', indexAttribute: 'name'),
])
->tag('console.command')
;
};

View File

@@ -1068,11 +1068,20 @@ final class AiBundle extends AbstractBundle
*/
private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void
{
$transformers = [];
foreach ($config['transformers'] ?? [] as $transformer) {
$transformers[] = new Reference($transformer);
}
$definition = new Definition(Indexer::class, [
new Reference($config['loader']),
new Reference($config['vectorizer']),
new Reference($config['store']),
$config['source'],
$transformers,
new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE),
]);
$definition->addTag('ai.indexer', ['name' => $name]);
$container->setDefinition('ai.indexer.'.$name, $definition);
}

View File

@@ -19,6 +19,8 @@ use PHPUnit\Framework\Attributes\UsesClass;
use PHPUnit\Framework\TestCase;
use Symfony\AI\AiBundle\AiBundle;
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
use Symfony\AI\Store\Document\Transformer\TextTrimTransformer;
use Symfony\AI\Store\Document\Vectorizer;
use Symfony\Component\Config\Definition\Exception\InvalidConfigurationException;
use Symfony\Component\DependencyInjection\ContainerBuilder;
@@ -678,6 +680,7 @@ class AiBundleTest extends TestCase
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'vectorizer' => 'ai.vectorizer.my_vectorizer',
'store' => 'ai.store.memory.my_store',
],
@@ -691,15 +694,251 @@ class AiBundleTest extends TestCase
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
// First argument should be a reference to the vectorizer
$this->assertInstanceOf(Reference::class, $arguments[0]);
$this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[0]);
$this->assertSame(InMemoryLoader::class, (string) $arguments[0]);
$this->assertInstanceOf(Reference::class, $arguments[1]);
$this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[1]);
// Should not create model-specific vectorizer when using configured one
$this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.vectorizer'));
$this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.model'));
}
public function testIndexerWithStringSource()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'source' => 'https://example.com/feed.xml',
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertSame('https://example.com/feed.xml', $arguments[3]);
}
public function testIndexerWithArraySource()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'source' => [
'/path/to/file1.txt',
'/path/to/file2.txt',
'https://example.com/feed.xml',
],
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertIsArray($arguments[3]);
$this->assertCount(3, $arguments[3]);
$this->assertSame([
'/path/to/file1.txt',
'/path/to/file2.txt',
'https://example.com/feed.xml',
], $arguments[3]);
}
public function testIndexerWithNullSource()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
// source not configured, should default to null
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertNull($arguments[3]);
}
public function testIndexerWithConfiguredTransformers()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'transformers' => [
TextTrimTransformer::class,
'App\CustomTransformer',
],
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertIsArray($arguments[4]);
$this->assertCount(2, $arguments[4]);
$this->assertInstanceOf(Reference::class, $arguments[4][0]);
$this->assertSame(TextTrimTransformer::class, (string) $arguments[4][0]);
$this->assertInstanceOf(Reference::class, $arguments[4][1]);
$this->assertSame('App\CustomTransformer', (string) $arguments[4][1]);
}
public function testIndexerWithEmptyTransformers()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'transformers' => [],
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertSame([], $arguments[4]);
}
public function testIndexerWithoutTransformers()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
// transformers not configured, should default to empty array
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertSame([], $arguments[4]);
}
public function testIndexerWithSourceAndTransformers()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'indexer' => [
'my_indexer' => [
'loader' => InMemoryLoader::class,
'source' => [
'/path/to/file1.txt',
'/path/to/file2.txt',
],
'transformers' => [
TextTrimTransformer::class,
],
'vectorizer' => 'my_vectorizer_service',
'store' => 'ai.store.memory.my_store',
],
],
],
]);
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();
$this->assertInstanceOf(Reference::class, $arguments[0]);
$this->assertSame(InMemoryLoader::class, (string) $arguments[0]);
$this->assertInstanceOf(Reference::class, $arguments[1]);
$this->assertSame('my_vectorizer_service', (string) $arguments[1]);
$this->assertInstanceOf(Reference::class, $arguments[2]);
$this->assertSame('ai.store.memory.my_store', (string) $arguments[2]);
$this->assertIsArray($arguments[3]);
$this->assertCount(2, $arguments[3]);
$this->assertSame([
'/path/to/file1.txt',
'/path/to/file2.txt',
], $arguments[3]);
$this->assertIsArray($arguments[4]);
$this->assertCount(1, $arguments[4]);
$this->assertInstanceOf(Reference::class, $arguments[4][0]);
$this->assertSame(TextTrimTransformer::class, (string) $arguments[4][0]);
}
private function buildContainer(array $configuration): ContainerBuilder
{
$container = new ContainerBuilder();
@@ -959,6 +1198,7 @@ class AiBundleTest extends TestCase
],
'indexer' => [
'my_text_indexer' => [
'loader' => InMemoryLoader::class,
'vectorizer' => 'ai.vectorizer.test_vectorizer',
'store' => 'my_azure_search_store_service_id',
],

View File

@@ -0,0 +1,109 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\AI\Store\Command;
use Symfony\AI\Store\Exception\RuntimeException;
use Symfony\AI\Store\IndexerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Completion\CompletionInput;
use Symfony\Component\Console\Completion\CompletionSuggestions;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\DependencyInjection\ServiceLocator;
/**
* @author Oskar Stark <oskarstark@googlemail.com>
*/
#[AsCommand(
name: 'ai:store:index',
description: 'Index documents into a store',
)]
final class IndexCommand extends Command
{
/**
* @param ServiceLocator<IndexerInterface> $indexers
*/
public function __construct(
private readonly ServiceLocator $indexers,
) {
parent::__construct();
}
public function complete(CompletionInput $input, CompletionSuggestions $suggestions): void
{
if ($input->mustSuggestArgumentValuesFor('indexer')) {
$suggestions->suggestValues(array_keys($this->indexers->getProvidedServices()));
}
}
protected function configure(): void
{
$this
->addArgument('indexer', InputArgument::REQUIRED, 'Name of the indexer to run')
->addOption('source', 's', InputOption::VALUE_OPTIONAL | InputOption::VALUE_IS_ARRAY, 'Source(s) to index (overrides configured source)')
->setHelp(<<<'EOF'
The <info>%command.name%</info> command indexes documents into a store using the specified indexer.
Basic usage:
<info>php %command.full_name% blog</info>
Override the configured source with a single source:
<info>php %command.full_name% blog --source=/path/to/file.txt</info>
Override with multiple sources:
<info>php %command.full_name% blog --source=/path/to/file1.txt --source=/path/to/file2.txt</info>
EOF
)
;
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$indexer = $input->getArgument('indexer');
$sources = $input->getOption('source');
// Convert array of sources to single source or null
$source = match (true) {
[] === $sources => null,
1 === \count($sources) => $sources[0],
default => $sources,
};
if (!$this->indexers->has($indexer)) {
throw new RuntimeException(\sprintf('The "%s" indexer does not exist.', $indexer));
}
$indexerService = $this->indexers->get($indexer);
// If source override is provided, use withSource to create a new indexer instance
if (null !== $source) {
$indexerService = $indexerService->withSource($source);
}
$io->title(\sprintf('Indexing documents using "%s" indexer', $indexer));
try {
$indexerService->index([]);
$io->success(\sprintf('Documents indexed successfully using "%s" indexer.', $indexer));
} catch (\Exception $e) {
throw new RuntimeException(\sprintf('An error occurred while indexing with "%s": ', $indexer).$e->getMessage(), previous: $e);
}
return Command::SUCCESS;
}
}

View File

@@ -30,7 +30,7 @@ final readonly class InMemoryLoader implements LoaderInterface
) {
}
public function load(string $source, array $options = []): iterable
public function load(?string $source, array $options = []): iterable
{
yield from $this->documents;
}

View File

@@ -14,6 +14,7 @@ namespace Symfony\AI\Store\Document\Loader;
use Symfony\AI\Store\Document\LoaderInterface;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Exception\InvalidArgumentException;
use Symfony\AI\Store\Exception\RuntimeException;
use Symfony\Component\Uid\Uuid;
@@ -22,8 +23,12 @@ use Symfony\Component\Uid\Uuid;
*/
final readonly class TextFileLoader implements LoaderInterface
{
public function load(string $source, array $options = []): iterable
public function load(?string $source, array $options = []): iterable
{
if (null === $source) {
throw new InvalidArgumentException('TextFileLoader requires a file path as source, null given.');
}
if (!is_file($source)) {
throw new RuntimeException(\sprintf('File "%s" does not exist.', $source));
}

View File

@@ -17,10 +17,10 @@ namespace Symfony\AI\Store\Document;
interface LoaderInterface
{
/**
* @param string $source Identifier for the loader to load the documents from, e.g. file path, folder, or URL.
* @param string|null $source Identifier for the loader to load the documents from, e.g. file path, folder, or URL. Can be null for InMemoryLoader.
* @param array<string, mixed> $options loader specific set of options to control the loading process
*
* @return iterable<TextDocument> iterable of TextDocuments loaded from the source
*/
public function load(string $source, array $options = []): iterable;
public function load(?string $source, array $options = []): iterable;
}

View File

@@ -0,0 +1,55 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\AI\Store\Document\Transformer;
use Symfony\AI\Store\Document\TransformerInterface;
use Symfony\AI\Store\Exception\InvalidArgumentException;
/**
* Replaces specified text within document content.
*
* @author Oskar Stark <oskarstark@googlemail.com>
*/
final readonly class TextReplaceTransformer implements TransformerInterface
{
public const OPTION_SEARCH = 'search';
public const OPTION_REPLACE = 'replace';
public function __construct(
private string $search = '',
private string $replace = '',
) {
self::validate($search, $replace);
}
/**
* @param array{search?: string, replace?: string} $options
*/
public function transform(iterable $documents, array $options = []): iterable
{
$search = $options[self::OPTION_SEARCH] ?? $this->search;
$replace = $options[self::OPTION_REPLACE] ?? $this->replace;
self::validate($search, $replace);
foreach ($documents as $document) {
yield $document->withContent(str_replace($search, $replace, $document->content));
}
}
private static function validate(string $search, string $replace): void
{
if ($search === $replace) {
throw new InvalidArgumentException('Search and replace strings must be different.');
}
}
}

View File

@@ -34,7 +34,7 @@ final readonly class TextSplitTransformer implements TransformerInterface
private int $overlap = 200,
) {
if ($this->overlap < 0 || $this->overlap >= $this->chunkSize) {
throw new InvalidArgumentException(\sprintf('Overlap must be non-negative and less than chunk size. Got chunk size: %d, overlap: %d', $this->chunkSize, $this->overlap));
throw new InvalidArgumentException(\sprintf('Overlap must be non-negative and less than chunk size. Got chunk size: %d, overlap: %d.', $this->chunkSize, $this->overlap));
}
}

View File

@@ -0,0 +1,29 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\AI\Store\Document\Transformer;
use Symfony\AI\Store\Document\TransformerInterface;
/**
* Trims whitespace from document content.
*
* @author Oskar Stark <oskarstark@googlemail.com>
*/
final readonly class TextTrimTransformer implements TransformerInterface
{
public function transform(iterable $documents, array $options = []): iterable
{
foreach ($documents as $document) {
yield $document->withContent(trim($document->content));
}
}
}

View File

@@ -0,0 +1,19 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\AI\Store\Exception;
/**
* @author Oskar Stark <oskarstark@googlemail.com>
*/
class LogicException extends \LogicException implements ExceptionInterface
{
}

View File

@@ -13,31 +13,69 @@ namespace Symfony\AI\Store;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use Symfony\AI\Store\Document\LoaderInterface;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\TransformerInterface;
use Symfony\AI\Store\Document\VectorizerInterface;
/**
* @author Christopher Hertel <mail@christopher-hertel.de>
* @author Oskar Stark <oskarstark@googlemail.com>
*/
final readonly class Indexer implements IndexerInterface
class Indexer implements IndexerInterface
{
public function __construct(
private VectorizerInterface $vectorizer,
private StoreInterface $store,
private LoggerInterface $logger = new NullLogger(),
) {
}
/**
* @var array<string|null>
*/
private array $sources = [];
/**
* @param TextDocument|iterable<TextDocument> $documents
* @param int $chunkSize number of documents to vectorize and store in one batch
* @param string|array<string>|null $source
* @param TransformerInterface[] $transformers
*/
public function index(TextDocument|iterable $documents, int $chunkSize = 50): void
public function __construct(
private LoaderInterface $loader,
private VectorizerInterface $vectorizer,
private StoreInterface $store,
string|array|null $source = null,
private array $transformers = [],
private LoggerInterface $logger = new NullLogger(),
) {
$this->sources = null === $source ? [] : (array) $source;
}
public function withSource(string|array $source): self
{
if ($documents instanceof TextDocument) {
$documents = [$documents];
return new self($this->loader, $this->vectorizer, $this->store, $source, $this->transformers, $this->logger);
}
public function index(array $options = []): void
{
$this->logger->debug('Starting document processing', ['sources' => $this->sources, 'options' => $options]);
$documents = [];
if ([] === $this->sources) {
// No specific source provided, load with null
$documents = $this->loadSource(null);
} else {
foreach ($this->sources as $singleSource) {
$documents = array_merge($documents, $this->loadSource($singleSource));
}
}
if ([] === $documents) {
$this->logger->debug('No documents to process', ['sources' => $this->sources]);
return;
}
// Transform documents through all transformers
foreach ($this->transformers as $transformer) {
$documents = $transformer->transform($documents);
}
// Vectorize and store documents in chunks
$chunkSize = $options['chunk_size'] ?? 50;
$counter = 0;
$chunk = [];
foreach ($documents as $document) {
@@ -50,10 +88,24 @@ final readonly class Indexer implements IndexerInterface
}
}
if (\count($chunk) > 0) {
// Handle remaining documents
if ([] !== $chunk) {
$this->store->add(...$this->vectorizer->vectorizeTextDocuments($chunk));
}
$this->logger->debug(0 === $counter ? 'No documents to index' : \sprintf('Indexed %d documents', $counter));
$this->logger->debug('Document processing completed', ['total_documents' => $counter]);
}
/**
* @return TextDocument[]
*/
private function loadSource(?string $source): array
{
$documents = [];
foreach ($this->loader->load($source) as $document) {
$documents[] = $document;
}
return $documents;
}
}

View File

@@ -11,18 +11,24 @@
namespace Symfony\AI\Store;
use Symfony\AI\Store\Document\TextDocument;
/**
* Converts a collection of TextDocuments into VectorDocuments and pushes them to a store implementation.
* Handles the complete document processing pipeline: load → transform → vectorize → store.
*
* @author Oskar Stark <oskarstark@googlemail.com>
*/
interface IndexerInterface
{
/**
* @param TextDocument|iterable<TextDocument> $documents
* @param int $chunkSize number of documents to vectorize and store in one batch
* Process sources through the complete document pipeline: load → transform → vectorize → store.
*
* @param array{chunk_size?: int} $options Processing options
*/
public function index(TextDocument|iterable $documents, int $chunkSize = 50): void;
public function index(array $options = []): void;
/**
* Create a new instance with a different source.
*
* @param string|array<string> $source Source identifier (file path, URL, etc.) or array of sources
*/
public function withSource(string|array $source): self;
}

View File

@@ -27,9 +27,9 @@ final class InMemoryLoaderTest extends TestCase
public function testLoadWithEmptyDocuments()
{
$loader = new InMemoryLoader();
$documents = iterator_to_array($loader->load('ignored-source'));
$documents = iterator_to_array($loader->load(null));
$this->assertEmpty($documents);
$this->assertSame([], $documents);
}
public function testLoadWithSingleDocument()
@@ -37,7 +37,7 @@ final class InMemoryLoaderTest extends TestCase
$document = new TextDocument(Uuid::v4(), 'This is test content');
$loader = new InMemoryLoader([$document]);
$documents = iterator_to_array($loader->load('ignored-source'));
$documents = iterator_to_array($loader->load(null));
$this->assertCount(1, $documents);
$this->assertSame($document, $documents[0]);
@@ -50,7 +50,7 @@ final class InMemoryLoaderTest extends TestCase
$document2 = new TextDocument(Uuid::v4(), 'Second document', new Metadata(['type' => 'test']));
$loader = new InMemoryLoader([$document1, $document2]);
$documents = iterator_to_array($loader->load('ignored-source'));
$documents = iterator_to_array($loader->load(null));
$this->assertCount(2, $documents);
$this->assertSame($document1, $documents[0]);
@@ -62,18 +62,14 @@ final class InMemoryLoaderTest extends TestCase
public function testLoadIgnoresSourceParameter()
{
$document = new TextDocument(Uuid::v4(), 'test content');
$document = new TextDocument(Uuid::v4(), 'Test content');
$loader = new InMemoryLoader([$document]);
$documents1 = iterator_to_array($loader->load('source1'));
$documents2 = iterator_to_array($loader->load('source2'));
$documents3 = iterator_to_array($loader->load('any-source'));
// Source parameter should be ignored - same result regardless of value
$documentsWithNull = iterator_to_array($loader->load(null));
$documentsWithString = iterator_to_array($loader->load('ignored-source'));
$this->assertCount(1, $documents1);
$this->assertCount(1, $documents2);
$this->assertCount(1, $documents3);
$this->assertSame($document, $documents1[0]);
$this->assertSame($document, $documents2[0]);
$this->assertSame($document, $documents3[0]);
$this->assertSame($documentsWithNull, $documentsWithString);
$this->assertSame($document, $documentsWithNull[0]);
}
}

View File

@@ -15,11 +15,22 @@ use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\TestCase;
use Symfony\AI\Store\Document\Loader\TextFileLoader;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Exception\InvalidArgumentException;
use Symfony\AI\Store\Exception\RuntimeException;
#[CoversClass(TextFileLoader::class)]
final class TextFileLoaderTest extends TestCase
{
public function testLoadWithNullSource()
{
$loader = new TextFileLoader();
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('TextFileLoader requires a file path as source, null given.');
iterator_to_array($loader->load(null));
}
public function testLoadWithInvalidSource()
{
$loader = new TextFileLoader();

View File

@@ -0,0 +1,196 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\AI\Store\Tests\Document\Transformer;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\TestCase;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\Transformer\TextReplaceTransformer;
use Symfony\AI\Store\Exception\InvalidArgumentException;
use Symfony\Component\Uid\Uuid;
/**
* @author Oskar Stark <oskarstark@googlemail.com>
*/
#[CoversClass(TextReplaceTransformer::class)]
final class TextReplaceTransformerTest extends TestCase
{
public function testReplaceWithConstructorParameters()
{
$transformer = new TextReplaceTransformer('foo', 'bar');
$document = new TextDocument(Uuid::v4(), 'foo is foo');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame('bar is bar', $result[0]->content);
}
public function testReplaceWithOptions()
{
$transformer = new TextReplaceTransformer('initial', 'value');
$document = new TextDocument(Uuid::v4(), 'hello world');
$result = iterator_to_array($transformer->transform([$document], [
TextReplaceTransformer::OPTION_SEARCH => 'hello',
TextReplaceTransformer::OPTION_REPLACE => 'goodbye',
]));
$this->assertCount(1, $result);
$this->assertSame('goodbye world', $result[0]->content);
}
public function testOptionsOverrideConstructorParameters()
{
$transformer = new TextReplaceTransformer('foo', 'bar');
$document = new TextDocument(Uuid::v4(), 'foo hello');
$result = iterator_to_array($transformer->transform([$document], [
TextReplaceTransformer::OPTION_SEARCH => 'hello',
TextReplaceTransformer::OPTION_REPLACE => 'world',
]));
$this->assertCount(1, $result);
$this->assertSame('foo world', $result[0]->content);
}
public function testReplaceMultipleOccurrences()
{
$transformer = new TextReplaceTransformer('a', 'b');
$document = new TextDocument(Uuid::v4(), 'a a a');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame('b b b', $result[0]->content);
}
public function testReplaceWithEmptyString()
{
$transformer = new TextReplaceTransformer('remove', '');
$document = new TextDocument(Uuid::v4(), 'remove this word');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame(' this word', $result[0]->content);
}
public function testReplacePreservesMetadata()
{
$metadata = new Metadata(['key' => 'value']);
$transformer = new TextReplaceTransformer('old', 'new');
$document = new TextDocument(Uuid::v4(), 'old text', $metadata);
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame('new text', $result[0]->content);
$this->assertSame($metadata, $result[0]->metadata);
}
public function testReplacePreservesDocumentId()
{
$id = Uuid::v4();
$transformer = new TextReplaceTransformer('old', 'new');
$document = new TextDocument($id, 'old text');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame($id, $result[0]->id);
}
public function testReplaceProcessesMultipleDocuments()
{
$transformer = new TextReplaceTransformer('x', 'y');
$documents = [
new TextDocument(Uuid::v4(), 'x marks the spot'),
new TextDocument(Uuid::v4(), 'find x here'),
new TextDocument(Uuid::v4(), 'no match'),
];
$result = iterator_to_array($transformer->transform($documents));
$this->assertCount(3, $result);
$this->assertSame('y marks the spot', $result[0]->content);
$this->assertSame('find y here', $result[1]->content);
$this->assertSame('no match', $result[2]->content);
}
public function testReplaceCaseSensitive()
{
$transformer = new TextReplaceTransformer('Hello', 'Goodbye');
$document = new TextDocument(Uuid::v4(), 'Hello hello HELLO');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame('Goodbye hello HELLO', $result[0]->content);
}
public function testReplaceHandlesNoMatch()
{
$transformer = new TextReplaceTransformer('notfound', 'replacement');
$document = new TextDocument(Uuid::v4(), 'original text');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame('original text', $result[0]->content);
}
public function testConstructorThrowsExceptionWhenSearchEqualsReplace()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('Search and replace strings must be different.');
new TextReplaceTransformer('same', 'same');
}
public function testTransformThrowsExceptionWhenSearchEqualsReplaceInOptions()
{
$transformer = new TextReplaceTransformer('initial', 'value');
$document = new TextDocument(Uuid::v4(), 'text');
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('Search and replace strings must be different.');
iterator_to_array($transformer->transform([$document], [
TextReplaceTransformer::OPTION_SEARCH => 'same',
TextReplaceTransformer::OPTION_REPLACE => 'same',
]));
}
public function testEmptySearchAndReplaceThrowsException()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('Search and replace strings must be different.');
new TextReplaceTransformer('', '');
}
public function testPartialOptionsUseConstructorDefaults()
{
$transformer = new TextReplaceTransformer('default', 'replacement');
$document = new TextDocument(Uuid::v4(), 'default text');
// Only provide search option, should use constructor's replace value
$result = iterator_to_array($transformer->transform([$document], [
TextReplaceTransformer::OPTION_SEARCH => 'text',
]));
$this->assertCount(1, $result);
$this->assertSame('default replacement', $result[0]->content);
}
}

View File

@@ -0,0 +1,98 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\AI\Store\Tests\Document\Transformer;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\TestWith;
use PHPUnit\Framework\TestCase;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\Transformer\TextTrimTransformer;
use Symfony\AI\Store\Exception\InvalidArgumentException;
use Symfony\Component\Uid\Uuid;
/**
* @author Oskar Stark <oskarstark@googlemail.com>
*/
#[CoversClass(TextTrimTransformer::class)]
final class TextTrimTransformerTest extends TestCase
{
#[TestWith([' text with spaces ', 'text with spaces'])]
#[TestWith(["\n\ntext with newlines\n\n", 'text with newlines'])]
#[TestWith(["\t\ttext with tabs\t\t", 'text with tabs'])]
#[TestWith([' text with middle spaces ', 'text with middle spaces'])]
#[TestWith(['already trimmed', 'already trimmed'])]
#[TestWith([' mixed whitespace ', 'mixed whitespace'])]
#[TestWith(["\r\ncarriage return and newline\r\n", 'carriage return and newline'])]
public function testTrim(string $input, string $expected)
{
$transformer = new TextTrimTransformer();
$document = new TextDocument(Uuid::v4(), $input);
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame($expected, $result[0]->content);
}
public function testTrimHandlesOnlyWhitespace()
{
// Note: TextDocument doesn't allow empty content, so we can't test trimming to empty string
// This test verifies that attempting to create a document with only whitespace throws an exception
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('The content shall not be an empty string.');
new TextDocument(Uuid::v4(), ' ');
}
public function testTrimProcessesMultipleDocuments()
{
$transformer = new TextTrimTransformer();
$documents = [
new TextDocument(Uuid::v4(), ' first '),
new TextDocument(Uuid::v4(), ' second '),
new TextDocument(Uuid::v4(), ' third '),
];
$result = iterator_to_array($transformer->transform($documents));
$this->assertCount(3, $result);
$this->assertSame('first', $result[0]->content);
$this->assertSame('second', $result[1]->content);
$this->assertSame('third', $result[2]->content);
}
public function testTrimPreservesMetadata()
{
$transformer = new TextTrimTransformer();
$metadata = new Metadata(['key' => 'value']);
$document = new TextDocument(Uuid::v4(), ' text ', $metadata);
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame('text', $result[0]->content);
$this->assertSame($metadata, $result[0]->metadata);
}
public function testTrimPreservesDocumentId()
{
$transformer = new TextTrimTransformer();
$id = Uuid::v4();
$document = new TextDocument($id, ' text ');
$result = iterator_to_array($transformer->transform([$document]));
$this->assertCount(1, $result);
$this->assertSame($id, $result[0]->id);
}
}

View File

@@ -15,7 +15,6 @@ use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Medium;
use PHPUnit\Framework\Attributes\UsesClass;
use PHPUnit\Framework\TestCase;
use Psr\Log\LoggerInterface;
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
use Symfony\AI\Platform\Message\ToolCallMessage;
use Symfony\AI\Platform\Platform;
@@ -23,6 +22,7 @@ use Symfony\AI\Platform\Result\ResultPromise;
use Symfony\AI\Platform\Result\ToolCall;
use Symfony\AI\Platform\Result\VectorResult;
use Symfony\AI\Platform\Vector\Vector;
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\VectorDocument;
@@ -34,6 +34,7 @@ use Symfony\Component\Uid\Uuid;
#[CoversClass(Indexer::class)]
#[Medium]
#[UsesClass(InMemoryLoader::class)]
#[UsesClass(TextDocument::class)]
#[UsesClass(Vector::class)]
#[UsesClass(VectorDocument::class)]
@@ -49,10 +50,11 @@ final class IndexerTest extends TestCase
{
$document = new TextDocument($id = Uuid::v4(), 'Test content');
$vector = new Vector([0.1, 0.2, 0.3]);
$loader = new InMemoryLoader([$document]);
$vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), new Embeddings());
$indexer = new Indexer($vectorizer, $store = new TestStore());
$indexer->index($document);
$indexer = new Indexer($loader, $vectorizer, $store = new TestStore());
$indexer->index();
$this->assertCount(1, $store->documents);
$this->assertInstanceOf(VectorDocument::class, $store->documents[0]);
@@ -62,12 +64,11 @@ final class IndexerTest extends TestCase
public function testIndexEmptyDocumentList()
{
$logger = self::createMock(LoggerInterface::class);
$logger->expects($this->once())->method('debug')->with('No documents to index');
$loader = new InMemoryLoader([]);
$vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(), new Embeddings());
$indexer = new Indexer($vectorizer, $store = new TestStore(), $logger);
$indexer->index([]);
$indexer = new Indexer($loader, $vectorizer, $store = new TestStore());
$indexer->index();
$this->assertSame([], $store->documents);
}
@@ -77,10 +78,11 @@ final class IndexerTest extends TestCase
$metadata = new Metadata(['key' => 'value']);
$document = new TextDocument($id = Uuid::v4(), 'Test content', $metadata);
$vector = new Vector([0.1, 0.2, 0.3]);
$loader = new InMemoryLoader([$document]);
$vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), new Embeddings());
$indexer = new Indexer($vectorizer, $store = new TestStore());
$indexer->index($document);
$indexer = new Indexer($loader, $vectorizer, $store = new TestStore());
$indexer->index();
$this->assertSame(1, $store->addCalls);
$this->assertCount(1, $store->documents);
@@ -89,4 +91,64 @@ final class IndexerTest extends TestCase
$this->assertSame($vector, $store->documents[0]->vector);
$this->assertSame(['key' => 'value'], $store->documents[0]->metadata->getArrayCopy());
}
public function testWithSource()
{
$document1 = new TextDocument(Uuid::v4(), 'Document 1');
$vector = new Vector([0.1, 0.2, 0.3]);
// InMemoryLoader doesn't use source parameter, so we'll test withSource method's immutability
$loader = new InMemoryLoader([$document1]);
$vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), new Embeddings());
// Create indexer with initial source
$indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), 'source1');
// Create new indexer with different source
$indexerWithNewSource = $indexer->withSource('source2');
// Verify it returns a new instance (immutability)
$this->assertNotSame($indexer, $indexerWithNewSource);
// Both can index successfully
$indexer->index();
$this->assertCount(1, $store->documents);
$store2 = new TestStore();
$indexer2 = new Indexer($loader, $vectorizer, $store2, 'source2');
$indexer2->index();
$this->assertCount(1, $store2->documents);
}
public function testWithSourceArray()
{
$document1 = new TextDocument(Uuid::v4(), 'Document 1');
$document2 = new TextDocument(Uuid::v4(), 'Document 2');
$vector = new Vector([0.1, 0.2, 0.3]);
// InMemoryLoader returns all documents regardless of source
$loader = new InMemoryLoader([$document1, $document2]);
$vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), new Embeddings());
// Create indexer with single source
$indexer = new Indexer($loader, $vectorizer, $store1 = new TestStore(), 'source1');
// Create new indexer with array of sources
$indexerWithMultipleSources = $indexer->withSource(['source2', 'source3']);
// Verify it returns a new instance (immutability)
$this->assertNotSame($indexer, $indexerWithMultipleSources);
// Since InMemoryLoader ignores source, both will index all documents
$indexer->index();
$this->assertCount(2, $store1->documents);
$store2 = new TestStore();
$indexer2 = new Indexer($loader, $vectorizer, $store2, ['source2', 'source3']);
$indexer2->index();
// With array sources, loadSource is called for each source
// Since InMemoryLoader ignores source, it returns all docs each time
// So with 2 sources and 2 docs each time = 4 documents total
$this->assertCount(4, $store2->documents);
}
}