Skip to content

Flow YAML Format

Flow YAML Format

List of all available properties for a Flow YAML file.

Sample Flow YAML files
type: flow
name: "My Website Q&A Bot"
flow_type: QuestionAnswering
input:
  - name: the_url
    description: The URL of the website to load content
    type: string
    default: https://www.washingtonpost.com/technology/2023/05/16/sam-altman-open-ai-congress-hearing/
  - name: the_question
    description: The question to ask on the website content
    type: string
    required: true
tasks:
  - name: MyWebPageContentLoader
    description: Load text content from a webpage
    type: schema.task.content_loader.WebLoader
    input:
      url: $inputs.the_url
    task_config:
      retries: 3
      timeout_seconds: 10
      cache_result: true
      cache_expiration: 24h
  - name: MyLLM
    description: Use LLM to generate answer based on retrieve documents
    type: schema.task.llm.ChatLLM
    input:
      messages:
        - type: system
          content: |
            You are Q&A Chat Bot. Use the provided context to answer question.
            If you don't know the answer, just say "Sorry, I don't know the answer to this", don't try to make up an answer.

        - type: user
          content: |
            Context:
            {{ MyWebPageContentLoader.output[0].content }}

            Question: {{ inputs.question }}
      provider: openai
      model_name: gpt-3.5-turbo
      temperature: 0
      max_tokens: 256
    task_config:
      retries: 1
      timeout_seconds: 30
      cache_result: true
      cache_expiration: 24h
output:
  - name: llm_response
    value: $MyLLM.output
type: flow
name: Sample Question Answering Flow using existing vector store
description: Sample Question Answering Flow using existing vector store
flow_type: QuestionAnswering
input:
  - name: question
    description: Question from user
    type: string
    required: false
tasks:
  - name: EmbeddingsModel
    description: Embeddings
    type: schema.task.embedding.EmbeddingsModel
    input:
      mode: query
      documents: null
      provider: openai
      model_name: text-embedding-ada-002
      batch_size: 32
      query: $inputs.question
    task_config:
      retries: 3
      timeout_seconds: 90
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: QdrantRetriever
    description: Retrieve documents from a vector store
    type: schema.task.retriever.QdrantRetriever
    input:
      embeddings: $EmbeddingsModel.output
      collection_name: test_collection
    task_config:
      retries: 1
      timeout_seconds: 30
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: ChatLLM
    description: Use LLM to generate answer based on retrieve documents
    type: schema.task.llm.ChatLLM
    input:
      messages:
        - type: system
          content: >
            You are a Question Answering Bot. You can provide questions based on given context.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            Always include sources the answer in the format: 'Source: source1' or 'Sources: source1 source2'.
        - type: user
          content: |-
            Context:
            {%- for doc in QdrantRetriever.output %}
            Content: {{ doc.content }}
            Source: {{ doc.metadata.source }}
            {%- endfor %}
            Question: {{ inputs.question }}
      provider: openai
      model_name: gpt-3.5-turbo
      temperature: 0
      max_tokens: 512
    task_config:
      retries: 1
      timeout_seconds: 30
      persist_result: true
      cache_result: true
      cache_expiration: 5m
output:
  - name: llm_response
    value: $ChatLLM.output
  - name: documents
    value: $QdrantRetriever.output
type: flow
name: Sample Vector Store Builder from a website
description: Sample Vector Store Builder from a website
flow_type: VectorStoreManagement
input:
  - name: url
    description: The website address to load data from
    type: string
    required: false
tasks:
  - name: WebLoader
    description: Load data from a web page or from remote files
    type: schema.task.content_loader.WebLoader
    input:
      recursive: true
      url: $inputs.url
      max_urls: 10
      start_selector_type: class_name
      start_element: content
    task_config:
      retries: 0
      timeout_seconds: 30
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: CharacterTextSplitter
    description: Split text into chunks
    type: schema.task.text_splitter.CharacterTextSplitter
    input:
      documents: $WebLoader.output
      separator: "\n## "
      chunk_size: 300
      chunk_overlap: 20
    task_config:
      retries: 0
      timeout_seconds: 30
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: EmbeddingsModel
    description: Embeddings Embeddings
    type: schema.task.embedding.EmbeddingsModel
    input:
      mode: documents
      documents: $CharacterTextSplitter.output
      provider: openai
      model_name: text-embedding-ada-002
      batch_size: 32
    task_config:
      retries: 3
      timeout_seconds: 90
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: QdrantBuilder
    description: Build a vector store
    type: schema.task.vector_store_builder.QdrantBuilder
    input:
      embeddings: $EmbeddingsModel.output
      collection_name: test_collection
    task_config:
      retries: 1
      timeout_seconds: 10
      persist_result: true
      cache_result: false
      cache_expiration: ""
output:
  - name: collection_name
    value: $QdrantBuilder.output
  - name: documents
    value: $CharacterTextSplitter.output
type: flow
flow_type: Summarization
name: Sample summarizer from a web page
input:
  - name: the_url
    description: The URL of the website to load content
    type: string
    default: https://www.washingtonpost.com/technology/2023/05/16/sam-altman-open-ai-congress-hearing/
tasks:
  - name: MyWebPageContentLoader
    description: Load text content from a webpage
    type: schema.task.content_loader.WebLoader
    input:
      url: $inputs.the_url
      save_path: /tmp
    task_config:
      retries: 3
      timeout_seconds: 10
      cache_result: true
      cache_expiration: 24h
  - name: SampleRecursiveTextSplitter
    description: Recursively split text into chunks
    type: schema.task.text_splitter.RecursiveCharacterTextSplitter
    input:
      documents: $ContentLoader.output
      separators: ["\n\n", "\n"]
      chunk_size: 4000
      chunk_overlap: 0
    task_config:
      retries: 0
      timeout_seconds: 5
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: MyLLM
    description: Use Summarize LLM to generate summary based on text
    type: schema.task.llm.SummarizeLLM
    input:
      documents: $SampleRecursiveTextSplitter.output
      provider: openai
      model_name: gpt-3.5-turbo
      temperature: 0
      max_tokens: 4096
    task_config:
      retries: 0
      timeout_seconds: 120
      persist_result: true
      cache_result: true
      cache_expiration: 5m
output:
  - name: summary
    value: $MyLLM.output
type: flow
name: DocumentParser Demo
flow_type: DocumentParser
input:
  - name: file_urls
    type: array
    description: ""
    default:
      - ""
    required: false
  - name: fields_schema
    type: object
    description: ""
    default: ""
  - name: base_prompt
    type: string
    description: ""
    default: "Given the following document text and field descriptions, extract the field values.
For date values, return in DD/MM/YYYY format. 
Return a single value for each field.
If there are multiple values to a field, return value with the highest correctness.
If unable to find any value return null."
tasks:
  - name: RemoteFilesLoader
    description: Load data from remote files
    type: schema.task.content_loader.RemoteFilesLoader
    input:
      file_urls: $inputs.file_urls
    task_config:
      retries: 0
      timeout_seconds: 30
      persist_result: true
      cache_result: true
      cache_expiration: 5m
  - name: LLM
    description: Use LLM to generate text based on a prompt
    type: schema.task.llm.LLM
    input:
      prompt: >-
        {{inputs.base_prompt}}


        Document Text:

        ```

        {{RemoteFilesLoader.output[0].content}}

        ```


        Field Descriptions in JSON:

        ```

        {{inputs.fields_schema}}

        ```

        Return only field values in minified JSON format

      provider: openai
      model_name: gpt-4
      temperature: 0
      max_tokens: 512
    task_config:
      retries: 0
      timeout_seconds: 30
      persist_result: true
      cache_result: true
      cache_expiration: 5m
output:
  - name: llm_response
    value: $LLM.output

Flow Headers

General config for the flow. Example:

type: flow
name: "My Website Q&A Bot"
flow_type: QuestionAnswering

type String
Object type, always set to flow for a flow.

id String
Flow ID, must be unique across all users, can be left blank to be generated by server.

Format: ^[A-Za-z0-9_]+$ (only alphanumeric and underscore)

name String
Name of the flow, need not be unique, used to prefix id if id is not provided. Max length is 64 characters.

flow_type String
Type of the flow, this indicates whether a flow can be used in one of our sample apps hosted at https://apps.stack.govtext.gov.sg.

Format: ^[A-Za-z0-9_]+$ (only alphanumeric and underscore)

The following types are currently supported in sample apps:

  • DocumentParser
  • QuestionAnswering
  • Search
  • VectorStoreManagement

Flow Inputs

List of input parameters for the flow. Example:

input:
  - name: the_url
    description: The URL of the website to load content
    type: string
    default: https://www.washingtonpost.com/technology/2023/05/16/sam-altman-open-ai-congress-hearing/
  - name: the_question
    description: The question to ask on the website content
    type: string
    required: true

input Array of Maps
List of input parameters

input.name String
Name of the input parameter. This will be used as the key in the input map passed to the flow.

Format: ^[A-Za-z_]+$ (only alphabets and underscore)

input.description String
Description of the input parameter.

input.type String
Type of the input parameter.

Following types are supported: string, integer, number, boolean, object, array

input.required Boolean
Whether an input parameter is required or not. Default to false.

input.default Any
Default value of the input parameter. This must follow the type of the input parameter.

Tasks

List of tasks for the flow. Currently only sequential tasks are supported. Parallel tasks, control flows will be added later. Example:

tasks:
  - name: MyWebPageContentLoader
    description: Load text content from a webpage
    type: schema.task.content_loader.WebLoader
    input:
      url: $inputs.the_url
    task_config:
      retries: 3
      timeout_seconds: 10
      cache_result: true
      cache_expiration: 24h

tasks Array of Maps
List of tasks parameters

tasks.name String
Name of the task. Need to be unique within a flow. This will be used as the key to reference task output (e.g. MyTaskName.output).

Format: ^[A-Za-z0-9_]+$ (only alphanumeric and underscore)

tasks.description String
Description of the task.

tasks.type String
The type of the task. Available types are listed below.

Available task types
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
{
  "content_loaders": [
    {
      "name": "WebLoader",
      "module": "schema.task.content_loader",
      "description": "Load data from a web page with options to recursively load links found in the web page",
      "input": [
        {
          "name": "url",
          "description": "The url to scrape",
          "type": "str",
          "required": true
        },
        {
          "name": "recursive",
          "description": "Whether to recursively scrape the website.",
          "type": "bool",
          "required": true
        },
        {
          "name": "max_urls",
          "description": "The maximum number of urls to scrape.",
          "type": "int",
          "required": false,
          "default": 100
        },
        {
          "name": "start_selector_type",
          "description": "The type of selector to find the start element. Possible values are 'class_name', 'id', 'tag_name', 'xpath', 'css_selector'.",
          "type": "str",
          "required": true,
          "default": "class_name"
        },
        {
          "name": "start_element",
          "description": "The value of the start element. Body element is used if start element cannot be found.",
          "type": "str",
          "required": true,
          "default": "content"
        },
        {
          "name": "end_selector_type",
          "description": "The type of selector to find the end element. Possible values are 'class_name', 'id', 'tag_name', 'xpath', 'css_selector'.",
          "type": "str",
          "required": false,
          "default": "class_name"
        },
        {
          "name": "end_element",
          "description": "The value of the end element. Leave blank if no end element.",
          "type": "str",
          "required": false,
          "default": ""
        }
      ],
      "output": {
        "type": "List[schema.data.Document]"
      }
    },
    {
      "name": "RemoteFilesLoader",
      "module": "schema.task.content_loader",
      "description": "Load data from remote files",
      "input": [
        {
          "name": "file_urls",
          "type": "List[str]",
          "required": true
        }
      ],
      "output": {
        "type": "List[schema.data.Document]"
      }
    },
    {
      "name": "CombinedLoader",
      "module": "schema.task.content_loader",
      "description": "Load data from a web page or from remote files",
      "input": [
        {
          "name": "file_urls",
          "description": "Links to remote files. Currently on s3:// is supported.",
          "type": "List[str]"
        },
        {
          "name": "url",
          "description": "The url to scrape",
          "type": "str",
          "required": true
        },
        {
          "name": "recursive",
          "description": "Whether to recursively scrape the website.",
          "type": "bool",
          "required": true
        },
        {
          "name": "max_urls",
          "description": "The maximum number of urls to scrape.",
          "type": "int",
          "required": false,
          "default": 100
        },
        {
          "name": "start_selector_type",
          "description": "The type of selector to find the start element. Possible values are 'class_name', 'id', 'tag_name', 'xpath', 'css_selector'.",
          "type": "str",
          "required": true,
          "default": "class_name"
        },
        {
          "name": "start_element",
          "description": "The value of the start element. Body element is used if start element cannot be found.",
          "type": "str",
          "required": true,
          "default": "content"
        },
        {
          "name": "end_selector_type",
          "description": "The type of selector to find the end element. Possible values are 'class_name', 'id', 'tag_name', 'xpath', 'css_selector'.",
          "type": "str",
          "required": false,
          "default": "class_name"
        },
        {
          "name": "end_element",
          "description": "The value of the end element. Leave blank if no end element.",
          "type": "str",
          "required": false,
          "default": ""
        }
      ],
      "output": {
        "type": "List[schema.data.Document]"
      }
    }
  ],
  "text_splitters": [
    {
      "name": "CharacterTextSplitter",
      "module": "schema.task.text_splitter",
      "input": [
        {
          "name": "documents",
          "type": "List[schema.data.Document]",
          "required": true
        },
        {
          "name": "separator",
          "type": "str",
          "required": true
        },
        {
          "name": "chunk_size",
          "type": "int",
          "default": 1000
        },
        {
          "name": "chunk_overlap",
          "type": "int",
          "default": 20
        }
      ],
      "output": {
        "type": "List[schema.data.Document]"
      }
    },
    {
      "name": "RecursiveCharacterTextSplitter",
      "module": "schema.task.text_splitter",
      "input": [
        {
          "name": "documents",
          "type": "List[schema.data.Document]",
          "required": true
        },
        {
          "name": "separators",
          "type": "List[str]",
          "required": true
        },
        {
          "name": "chunk_size",
          "type": "int",
          "default": 1000
        },
        {
          "name": "chunk_overlap",
          "type": "int",
          "default": 20
        }
      ],
      "output": {
        "type": "List[schema.data.Document]"
      }
    }
  ],
  "embeddings": [
    {
      "name": "EmbeddingsModel",
      "module": "schema.task.embedding",
      "input": [
        {
          "name": "provider",
          "type": "str",
          "required": true,
          "options": [
            "openai"
          ]
        },
        {
          "name": "model_name",
          "type": "str",
          "required": true,
          "options": [
            "text-embedding-ada-002"
          ]
        },
        {
          "name": "mode",
          "type": "str",
          "options": [
            "documents",
            "query"
          ],
          "required": true,
          "default": "documents"
        },
        {
          "name": "documents",
          "type": "List[schema.data.Document]",
          "required": false
        },
        {
          "name": "query",
          "type": "str",
          "required": false
        },
        {
          "name": "batch_size",
          "type": "int",
          "required": true,
          "default": 32
        }
      ],
      "output": {
        "type": "schema.data.Embeddings"
      }
    }
  ],
  "vector_store_builders": [
    {
      "name": "QdrantBuilder",
      "module": "schema.task.vector_store_builder",
      "input": [
        {
          "name": "collection_name",
          "type": "str",
          "required": true
        },
        {
          "name": "embeddings",
          "type": "schema.data.Embeddings",
          "required": true
        },
        {
          "name": "rebuild",
          "type": "bool",
          "required": false
        }
      ],
      "output": {
        "type": "str"
      }
    }
  ],
  "retrievers": [
    {
      "name": "QdrantRetriever",
      "module": "schema.task.retriever",
      "input": [
        {
          "name": "collection_name",
          "description": "Name of the collection to retrieve from",
          "type": "str",
          "required": true
        },
        {
          "name": "embeddings",
          "description": "Embeddings used for similarity search",
          "type": "schema.data.Embeddings",
          "required": true
        },
        {
          "name": "max_returned_docs",
          "description": "Maximum number of documents to return",
          "type": "int",
          "required": false,
          "default": 10
        },
        {
          "name": "similarity_score_threshold",
          "description": "Minimum similarity score",
          "type": "float",
          "required": false,
          "default": 0.5
        },
        {
          "name": "max_tokens",
          "description": "Maximum number of tokens the returned documents can have",
          "type": "int",
          "required": false,
          "default": 3072
        }
      ],
      "output": {
        "type": "List[schema.data.Document]"
      }
    }
  ],
  "llm": [
    {
      "name": "LLM",
      "module": "schema.task.llm",
      "input": [
        {
          "name": "prompt",
          "type": "str",
          "required": true
        },
        {
          "name": "provider",
          "type": "str",
          "required": true,
          "options": [
            "openai"
          ]
        },
        {
          "name": "model_name",
          "type": "str",
          "required": true,
          "options": [
            "gpt-3.5-turbo",
            "gpt-3.5-turbo-16k",
            "gpt-4",
            "gpt-4-32k"
          ]
        },
        {
          "name": "temperature",
          "type": "float",
          "required": true,
          "default": 0.0
        },
        {
          "name": "max_tokens",
          "type": "int",
          "required": true,
          "default": 512
        }
      ],
      "output": {
        "type": "schema.data.LLMResponse"
      }
    },
    {
      "name": "ChatLLM",
      "module": "schema.task.llm",
      "input": [
        {
          "name": "messages",
          "type": "List[dict]",
          "required": true
        },
        {
          "name": "provider",
          "type": "str",
          "required": true,
          "options": [
            "openai"
          ]
        },
        {
          "name": "model_name",
          "type": "str",
          "required": true,
          "options": [
            "gpt-3.5-turbo",
            "gpt-3.5-turbo-16k",
            "gpt-4",
            "gpt-4-32k"
          ]
        },
        {
          "name": "temperature",
          "type": "float",
          "required": true,
          "default": 0.0
        },
        {
          "name": "max_tokens",
          "type": "int",
          "required": true,
          "default": 512
        }
      ],
      "output": {
        "type": "schema.data.LLMResponse"
      }
    },
    {
      "name": "SummarizeLLM",
      "module": "schema.task.llm",
      "input": [
        {
          "name": "documents",
          "type": "List[schema.data.Document]",
          "required": true
        },
        {
          "name": "provider",
          "type": "str",
          "required": true,
          "options": [
            "openai"
          ]
        },
        {
          "name": "model_name",
          "type": "str",
          "required": true,
          "options": [
            "gpt-3.5-turbo",
            "gpt-3.5-turbo-16k",
            "gpt-4",
            "gpt-4-32k"
          ]
        },
        {
          "name": "temperature",
          "type": "float",
          "required": true,
          "default": 0.0
        },
        {
          "name": "max_tokens",
          "type": "int",
          "required": true,
          "default": 512
        }
      ],
      "output": {
        "type": "schema.data.LLMResponse"
      }
    }
  ],
  "tools": [
    {
      "name": "SerpAPIWrapper",
      "module": "schema.task.tools",
      "input": [
        {
          "name": "query",
          "type": "str",
          "required": true
        }
      ],
      "output": {
        "type": "str"
      }
    }
  ]
}

tasks.input Map
Input parameters of the task, in the format of key: value where key is one of the parameters, in the above list of task types.

Value can be one of the followings:

  • $inputs.<input_name>: reference to an input parameter of the flow
  • $<task_name>.output: reference to the output of a previous task
  • Type of integer, number, boolean, object, or array: literal value
  • Type of string as Jinja2 template: a Jinja2 template string which will be evaluated at runtime.
    For example, {{ <task_name>.output }} is equivalent to $<task_name>.output.

tasks.task_config Map
Runtime configuration of the task:

  • retries: number of retries if the task fails
  • timeout_seconds: timeout in seconds for the task
  • cache_result: whether to cache the task result, default to false
  • cache_expiration: expiration time of the cached result

Outputs

List of outputs for the flow. Example:

output:
- name: llm_response
  value: $MyLLM.output

output.name String
Name of the output parameter, need to be unique among the output parameters.

Format: ^[A-Za-z0-9_]+$ (only alphanumeric and underscore)

output.value String
Value of the output parameter.

Value can be one of the followings:

  • $inputs.<input_name>: reference to an input parameter of the flow
  • $<task_name>.output: reference to the output of one of the tasks
  • Type of integer, number, boolean, object, or array: literal value
  • Type of string as Jinja2 template: a Jinja2 template string which will be evaluated at runtime.
    For example, {{ <task_name>.output }} is equivalent to $<task_name>.output.