Typesense is an open-source search engine that provides fast and relevant search results. It is built to be simple to use and easy to scale. In this post, we will look at how to use Typesense to index and search Azure Blob Storage.
I’m really just posting this to try and motivate me to get back into the swing of blogging, it’s far too late (early?) for this to turn out well but that’s the fun of it.
Tldr:
- Typesense is an open-source search engine that provides fast and relevant search results.
- Typesense is built to be simple to use and easy to scale.
- We can use the Typesense API to create collections, add documents, and search for documents.
- It’s as simble as managing to get microsoft’s npm packages working (derogatory)
Using Typesense to Index and Search Blob Storage
import { BlobServiceClient } from '@azure/storage-blob';
import axios from 'axios';
async function getBlobListFromAzure(): Promise<any[]> {
const accountName = process.env.AZURE_STORAGE_ACCOUNT_NAME;
const accountKey = process.env.AZURE_STORAGE_ACCOUNT_KEY;
const EndpointSuffix = "core.windows.net";
const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
const connectionString = `DefaultEndpointsProtocol=https;AccountName=${accountName};AccountKey=${accountKey};EndpointSuffix=${EndpointSuffix}`;
const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blobList = [];
for await (const blob of containerClient.listBlobsFlat()) {
blobList.push(blob);
}
return blobList;
}
function convertToUnixTimestamp(dateString: string): number {
const date = new Date(dateString);
return date.getTime() * 1000; // Convert milliseconds to microseconds
}
async function createCollectionInTypeSense(): Promise<void> {
const typeSenseEndpoint = 'http://localhost:8108';
const apiKey = process.env.TYPESENSE_API_KEY;
const collectionSchema = {
name: 'blob',
fields: [
{ name: 'name', type: 'string' },
{ name: 'url', type: 'string' },
{ name: 'fileName', type: 'string' },
{ name: 'lastModified', type: 'int64' } // Add required fields
],
default_sorting_field: 'lastModified'
};
const collectionsEndpoint = `${typeSenseEndpoint}/collections`;
try {
await axios.post(collectionsEndpoint, collectionSchema, {
headers: {
'Content-Type': 'application/json',
'X-TYPESENSE-API-KEY': apiKey
}
});
console.log('Collection created in TypeSense');
} catch (error: any) {
console.error('Error creating collection in TypeSense:', error.response.data);
}
}
async function sendDocumentsToTypeSense(documents: any[]): Promise<void> {
const typeSenseEndpoint = 'http://localhost:8108';
const apiKey = process.env.TYPESENSE_API_KEY;
const collectionName = 'blob';
const documentsEndpoint = `${typeSenseEndpoint}/collections/${collectionName}/documents`;
try {
const response = await axios.post(documentsEndpoint, documents, {
headers: {
'Content-Type': 'application/json',
'X-TYPESENSE-API-KEY': apiKey
}
});
console.log('Documents added to TypeSense:', response.data);
} catch (error: any) {
console.error('Error adding documents to TypeSense:', error.response.data);
}
}
async function main() {
await createCollectionInTypeSense();
const blobList = await getBlobListFromAzure();
const typeSenseDocuments = blobList.map(blob => ({
name: blob.name,
url: 'https://ggsalmsb84af022bf.blob.core.windows.net/lms/' + blob.name, // Add URL field
fileName: blob.name.split('/').pop(),
lastModified: convertToUnixTimestamp(blob.properties.lastModified) // Convert lastModified to int64
}));
// Convert documents to JSONLines format
const jsonLinesData = typeSenseDocuments.map(doc => JSON.stringify(doc)).join('\n');
await importDocumentsToTypeSense(jsonLinesData);
/* await sendDocumentsToTypeSense(JSON.parse(JSON.stringify(typeSenseDocuments))); */
}
async function importDocumentsToTypeSense(jsonLinesData: string): Promise<void> {
const typeSenseEndpoint = 'http://localhost:8108';
const apiKey = process.env.TYPESENSE_API_KEY;
const collectionName = 'blob';
const importEndpoint = `${typeSenseEndpoint}/collections/${collectionName}/documents/import?action=create`;
try {
const response = await axios.post(importEndpoint, jsonLinesData, {
headers: {
'Content-Type': 'text/plain',
'X-TYPESENSE-API-KEY': apiKey
}
});
console.log('Documents imported to TypeSense:', response.data);
} catch (error: any) {
console.error('Error importing documents to TypeSense:', error.response.data);
}
}
main();
Comments....