This page provides some examples of accessing Unstructured Partition Endpoint via different methods.
UNSTRUCTURED_API_KEY
,
representing your Unstructured API key. Get your API key.
For the POST and Unstructured JavaScript/TypeScript SDK examples, you’ll also need to set an environment variable named UNSTRUCTURED_API_URL
to the
value https://api.unstructuredapp.io/general/v0/general
For the Unstructured Python SDK, you do not need to set an environment variable named UNSTRUCTURED_API_URL
, as the Python SDK uses the API URL of
https://api.unstructuredapp.io/general/v0/general
by default. (The Unstructured JavaScript/TypeScript SDK does not have this feature yet; you must always specify the API URL.)
POST
curl -X 'POST' $UNSTRUCTURED_API_URL \
-H 'accept: application/json' \
-H 'Content-Type: multipart/form-data' \
-H 'unstructured-api-key: $UNSTRUCTURED_API_KEY' \
-F 'files=@sample-docs/layout-parser-paper.pdf' \
-F 'strategy=vlm' \
-F 'vlm_model_provider=openai' \
-F 'vlm_model=gpt-4o'
Python SDK
import asyncio
import os
import json
import unstructured_client
from unstructured_client.models import shared
client = unstructured_client.UnstructuredClient(
api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
)
async def call_api(filename, input_dir, output_dir):
req = {
"partition_parameters": {
"files": {
"content": open(filename, "rb"),
"file_name": os.path.basename(filename),
},
"strategy": shared.Strategy.VLM,
"vlm_model": "gpt-4o",
"vlm_model_provider": "openai",
"split_pdf_page": True,
"split_pdf_allow_failed": True,
"split_pdf_concurrency_level": 15
}
}
try:
res = await client.general.partition_async(
request=req
)
element_dicts = [element for element in res.elements]
json_elements = json.dumps(element_dicts, indent=2)
# Create the output directory structure.
relative_path = os.path.relpath(os.path.dirname(filename), input_dir)
output_subdir = os.path.join(output_dir, relative_path)
os.makedirs(output_subdir, exist_ok=True)
# Write the output file.
output_filename = os.path.join(output_subdir, os.path.basename(filename) + ".json")
with open(output_filename, "w") as file:
file.write(json_elements)
except Exception as e:
print(f"Error processing {filename}: {e}")
async def process_files(input_directory, output_directory):
tasks = []
for root, _, files in os.walk(input_directory):
for file in files:
if not file.endswith('.json'):
full_path = os.path.join(root, file)
tasks.append(call_api(full_path, input_directory, output_directory))
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(process_files(
input_directory=os.getenv("LOCAL_FILE_INPUT_DIR"),
output_directory=os.getenv("LOCAL_FILE_OUTPUT_DIR")
))
JavaScript/TypeScript SDK
import { UnstructuredClient } from "unstructured-client";
import * as fs from "fs";
import * as path from "path";
import { Strategy } from "unstructured-client/sdk/models/shared/index.js";
import { PartitionResponse } from "unstructured-client/sdk/models/operations";
// Send all files in the source path to Unstructured for processing.
// Send the processed data to the destination path.
function processFiles(
client: UnstructuredClient,
sourcePath: string,
destinationPath: string
): void {
// If an output directory does not exist for the corresponding input
// directory, then create it.
if (!fs.existsSync(destinationPath)) {
fs.mkdirSync(destinationPath, { recursive: true });
}
// Get all folders and files at the current level of the input directory.
const items = fs.readdirSync(sourcePath);
// For each folder and file in the input directory...
for (const item of items) {
const inputPath = path.join(sourcePath, item);
const outputPath = path.join(destinationPath, item)
// If it's a folder, call this function recursively.
if (fs.statSync(inputPath).isDirectory()) {
processFiles(client, inputPath, outputPath);
} else {
// If it's a file, send it to Unstructured for processing.
const data = fs.readFileSync(inputPath);
client.general.partition({
partitionParameters: {
files: {
content: data,
fileName: inputPath
},
strategy: Strategy.HiRes,
hiResModelName: "layout_v1.1.0",
splitPdfPage: true,
splitPdfConcurrencyLevel: 15,
splitPdfAllowFailed: true
}
}).then((res: PartitionResponse) => {
// If successfully processed, write the processed data to
// the destination directory.
if (res.statusCode == 200) {
const jsonElements = JSON.stringify(res, null, 2)
fs.writeFileSync(outputPath + ".json", jsonElements)
}
}).catch((e) => {
if (e.statusCode) {
console.log(e.statusCode);
console.log(e.body);
} else {
console.log(e);
}
});
}
}
}
const client = new UnstructuredClient({
security: { apiKeyAuth: process.env.UNSTRUCTURED_API_KEY },
serverURL: process.env.UNSTRUCTURED_API_URL
});
processFiles(
client,
process.env.LOCAL_FILE_INPUT_DIR,
process.env.LOCAL_FILE_OUTPUT_DIR
);
yolox
.
languages
parameter.
View the list of available languages.
POST
curl -X 'POST' $UNSTRUCTURED_API_URL \
-H 'accept: application/json' \
-H 'Content-Type: multipart/form-data' \
-H 'unstructured-api-key: $UNSTRUCTURED_API_KEY' \
-F 'files=@sample-docs/korean.png' \
-F 'strategy=vlm' \
-F 'vlm_model_provider=openai' \
-F 'vlm_model=gpt-4o' \-F 'languages=kor'
Python SDK
import asyncio
import os
import json
import unstructured_client
from unstructured_client.models import shared
client = unstructured_client.UnstructuredClient(
api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
)
async def call_api(filename, input_dir, output_dir):
req = {
"partition_parameters": {
"files": {
"content": open(filename, "rb"),
"file_name": os.path.basename(filename),
},
"strategy": shared.Strategy.OCR_ONLY,
"languages": ["kor"],
"split_pdf_page": True,
"split_pdf_allow_failed": True,
"split_pdf_concurrency_level": 15
}
}
try:
res = await client.general.partition_async(
request=req
)
element_dicts = [element for element in res.elements]
json_elements = json.dumps(element_dicts, indent=2)
# Create the output directory structure.
relative_path = os.path.relpath(os.path.dirname(filename), input_dir)
output_subdir = os.path.join(output_dir, relative_path)
os.makedirs(output_subdir, exist_ok=True)
# Write the output file.
output_filename = os.path.join(output_subdir, os.path.basename(filename) + ".json")
with open(output_filename, "w") as file:
file.write(json_elements)
except Exception as e:
print(f"Error processing {filename}: {e}")
async def process_files(input_directory, output_directory):
tasks = []
for root, _, files in os.walk(input_directory):
for file in files:
if not file.endswith('.json'):
full_path = os.path.join(root, file)
tasks.append(call_api(full_path, input_directory, output_directory))
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(process_files(
input_directory=os.getenv("LOCAL_FILE_INPUT_DIR"),
output_directory=os.getenv("LOCAL_FILE_OUTPUT_DIR")
))
JavaScript/TypeScript SDK
import { UnstructuredClient } from "unstructured-client";
import * as fs from "fs";
import * as path from "path";
import { Strategy } from "unstructured-client/sdk/models/shared/index.js";
import { PartitionResponse } from "unstructured-client/sdk/models/operations";
// Send all files in the source path to Unstructured for processing.
// Send the processed data to the destination path.
function processFiles(
client: UnstructuredClient,
sourcePath: string,
destinationPath: string
): void {
// If an output directory does not exist for the corresponding input
// directory, then create it.
if (!fs.existsSync(destinationPath)) {
fs.mkdirSync(destinationPath, { recursive: true });
}
// Get all folders and files at the current level of the input directory.
const items = fs.readdirSync(sourcePath);
// For each folder and file in the input directory...
for (const item of items) {
const inputPath = path.join(sourcePath, item);
const outputPath = path.join(destinationPath, item)
// If it's a folder, call this function recursively.
if (fs.statSync(inputPath).isDirectory()) {
processFiles(client, inputPath, outputPath);
} else {
// If it's a file, send it to Unstructured for processing.
const data = fs.readFileSync(inputPath);
client.general.partition({
partitionParameters: {
files: {
content: data,
fileName: inputPath
},
strategy: Strategy.OcrOnly,
languages: ["kor"],
splitPdfPage: true,
splitPdfConcurrencyLevel: 15,
splitPdfAllowFailed: true
}
}).then((res: PartitionResponse) => {
// If successfully processed, write the processed data to
// the destination directory.
if (res.statusCode == 200) {
const jsonElements = JSON.stringify(res, null, 2)
fs.writeFileSync(outputPath + ".json", jsonElements)
}
}).catch((e) => {
if (e.statusCode) {
console.log(e.statusCode);
console.log(e.body);
} else {
console.log(e);
}
});
}
}
}
const client = new UnstructuredClient({
security: { apiKeyAuth: process.env.UNSTRUCTURED_API_KEY },
serverURL: process.env.UNSTRUCTURED_API_URL
});
processFiles(
client,
process.env.LOCAL_FILE_INPUT_DIR,
process.env.LOCAL_FILE_OUTPUT_DIR
);
coordinates
parameter to true
to add this field to the elements in the response.
POST
curl -X 'POST' $UNSTRUCTURED_API_URL \
-H 'accept: application/json' \
-H 'Content-Type: multipart/form-data' \
-H 'unstructured-api-key: $UNSTRUCTURED_API_KEY' \
-F 'files=@sample-docs/layout-parser-paper.pdf' \
-F 'coordinates=true' \
-F 'strategy=hi_res'
Python SDK
import asyncio
import os
import json
import unstructured_client
from unstructured_client.models import shared
client = unstructured_client.UnstructuredClient(
api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
)
async def call_api(filename, input_dir, output_dir):
req = {
"partition_parameters": {
"files": {
"content": open(filename, "rb"),
"file_name": os.path.basename(filename),
},
"strategy": shared.Strategy.HI_RES,
"coordinates": True,
"split_pdf_page": True,
"split_pdf_allow_failed": True,
"split_pdf_concurrency_level": 15
}
}
try:
res = await client.general.partition_async(
request=req
)
element_dicts = [element for element in res.elements]
json_elements = json.dumps(element_dicts, indent=2)
# Create the output directory structure.
relative_path = os.path.relpath(os.path.dirname(filename), input_dir)
output_subdir = os.path.join(output_dir, relative_path)
os.makedirs(output_subdir, exist_ok=True)
# Write the output file.
output_filename = os.path.join(output_subdir, os.path.basename(filename) + ".json")
with open(output_filename, "w") as file:
file.write(json_elements)
except Exception as e:
print(f"Error processing {filename}: {e}")
async def process_files(input_directory, output_directory):
tasks = []
for root, _, files in os.walk(input_directory):
for file in files:
if not file.endswith('.json'):
full_path = os.path.join(root, file)
tasks.append(call_api(full_path, input_directory, output_directory))
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(process_files(
input_directory=os.getenv("LOCAL_FILE_INPUT_DIR"),
output_directory=os.getenv("LOCAL_FILE_OUTPUT_DIR")
))
JavaScript/TypeScript SDK
import { UnstructuredClient } from "unstructured-client";
import * as fs from "fs";
import * as path from "path";
import { Strategy } from "unstructured-client/sdk/models/shared/index.js";
import { PartitionResponse } from "unstructured-client/sdk/models/operations";
// Send all files in the source path to Unstructured for processing.
// Send the processed data to the destination path.
function processFiles(
client: UnstructuredClient,
sourcePath: string,
destinationPath: string
): void {
// If an output directory does not exist for the corresponding input
// directory, then create it.
if (!fs.existsSync(destinationPath)) {
fs.mkdirSync(destinationPath, { recursive: true });
}
// Get all folders and files at the current level of the input directory.
const items = fs.readdirSync(sourcePath);
// For each folder and file in the input directory...
for (const item of items) {
const inputPath = path.join(sourcePath, item);
const outputPath = path.join(destinationPath, item)
// If it's a folder, call this function recursively.
if (fs.statSync(inputPath).isDirectory()) {
processFiles(client, inputPath, outputPath);
} else {
// If it's a file, send it to Unstructured for processing.
const data = fs.readFileSync(inputPath);
client.general.partition({
partitionParameters: {
files: {
content: data,
fileName: inputPath
},
strategy: Strategy.HiRes,
coordinates: true,
splitPdfPage: true,
splitPdfConcurrencyLevel: 15,
splitPdfAllowFailed: true
}
}).then((res: PartitionResponse) => {
// If successfully processed, write the processed data to
// the destination directory.
if (res.statusCode == 200) {
const jsonElements = JSON.stringify(res, null, 2)
fs.writeFileSync(outputPath + ".json", jsonElements)
}
}).catch((e) => {
if (e.statusCode) {
console.log(e.statusCode);
console.log(e.body);
} else {
console.log(e);
}
});
}
}
}
const client = new UnstructuredClient({
security: { apiKeyAuth: process.env.UNSTRUCTURED_API_KEY },
serverURL: process.env.UNSTRUCTURED_API_URL
});
processFiles(
client,
process.env.LOCAL_FILE_INPUT_DIR,
process.env.LOCAL_FILE_OUTPUT_DIR
);
unique_element_ids=true
. Note: this means that the element IDs
will be random, so with every partition of the same file, you will get different IDs.
This can be helpful if you’d like to use the IDs as a primary key in a database, for example.
POST
curl -X 'POST' $UNSTRUCTURED_API_URL \
-H 'accept: application/json' \
-H 'Content-Type: multipart/form-data' \
-H 'unstructured-api-key: $UNSTRUCTURED_API_KEY' \
-F 'files=@sample-docs/layout-parser-paper-fast.pdf' \
-F 'unique_element_ids=true' \
-F 'strategy=vlm' \
-F 'vlm_model_provider=openai' \
-F 'vlm_model=gpt-4o'
Python SDK
import asyncio
import os
import json
import unstructured_client
from unstructured_client.models import shared
client = unstructured_client.UnstructuredClient(
api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
)
async def call_api(filename, input_dir, output_dir):
req = {
"partition_parameters": {
"files": {
"content": open(filename, "rb"),
"file_name": os.path.basename(filename),
},
"strategy": shared.Strategy.VLM,
"vlm_model": "gpt-4o",
"vlm_model_provider": "openai",
"unique_element_ids": True,
"split_pdf_page": True,
"split_pdf_allow_failed": True,
"split_pdf_concurrency_level": 15
}
}
try:
res = await client.general.partition_async(
request=req
)
element_dicts = [element for element in res.elements]
json_elements = json.dumps(element_dicts, indent=2)
# Create the output directory structure.
relative_path = os.path.relpath(os.path.dirname(filename), input_dir)
output_subdir = os.path.join(output_dir, relative_path)
os.makedirs(output_subdir, exist_ok=True)
# Write the output file.
output_filename = os.path.join(output_subdir, os.path.basename(filename) + ".json")
with open(output_filename, "w") as file:
file.write(json_elements)
except Exception as e:
print(f"Error processing {filename}: {e}")
async def process_files(input_directory, output_directory):
tasks = []
for root, _, files in os.walk(input_directory):
for file in files:
if not file.endswith('.json'):
full_path = os.path.join(root, file)
tasks.append(call_api(full_path, input_directory, output_directory))
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(process_files(
input_directory=os.getenv("LOCAL_FILE_INPUT_DIR"),
output_directory=os.getenv("LOCAL_FILE_OUTPUT_DIR")
))
JavaScript/TypeScript SDK
import { UnstructuredClient } from "unstructured-client";
import * as fs from "fs";
import * as path from "path";
import { Strategy } from "unstructured-client/sdk/models/shared/index.js";
import { PartitionResponse } from "unstructured-client/sdk/models/operations";
// Send all files in the source path to Unstructured for processing.
// Send the processed data to the destination path.
function processFiles(
client: UnstructuredClient,
sourcePath: string,
destinationPath: string
): void {
// If an output directory does not exist for the corresponding input
// directory, then create it.
if (!fs.existsSync(destinationPath)) {
fs.mkdirSync(destinationPath, { recursive: true });
}
// Get all folders and files at the current level of the input directory.
const items = fs.readdirSync(sourcePath);
// For each folder and file in the input directory...
for (const item of items) {
const inputPath = path.join(sourcePath, item);
const outputPath = path.join(destinationPath, item)
// If it's a folder, call this function recursively.
if (fs.statSync(inputPath).isDirectory()) {
processFiles(client, inputPath, outputPath);
} else {
// If it's a file, send it to Unstructured for processing.
const data = fs.readFileSync(inputPath);
client.general.partition({
partitionParameters: {
files: {
content: data,
fileName: inputPath
},
uniqueElementIds: true,
strategy: Strategy.HiRes,
splitPdfPage: true,
splitPdfConcurrencyLevel: 15,
splitPdfAllowFailed: true
}
}).then((res: PartitionResponse) => {
// If successfully processed, write the processed data to
// the destination directory.
if (res.statusCode == 200) {
const jsonElements = JSON.stringify(res, null, 2)
fs.writeFileSync(outputPath + ".json", jsonElements)
}
}).catch((e) => {
if (e.statusCode) {
console.log(e.statusCode);
console.log(e.body);
} else {
console.log(e);
}
});
}
}
}
const client = new UnstructuredClient({
security: { apiKeyAuth: process.env.UNSTRUCTURED_API_KEY },
serverURL: process.env.UNSTRUCTURED_API_URL
});
processFiles(
client,
process.env.LOCAL_FILE_INPUT_DIR,
process.env.LOCAL_FILE_OUTPUT_DIR
);
chunking_strategy
parameter.
By default, the chunking_strategy
is set to None
, and no chunking is performed.
POST
curl -X 'POST' $UNSTRUCTURED_API_URL \
-H 'accept: application/json' \
-H 'Content-Type: multipart/form-data' \
-H 'unstructured-api-key: $UNSTRUCTURED_API_KEY' \
-F 'files=@sample-docs/layout-parser-paper-fast.pdf' \
-F 'chunking_strategy=by_title' \
-F 'max_characters=1024' \
-F 'strategy=vlm' \
-F 'vlm_model_provider=openai' \
-F 'vlm_model=gpt-4o'
Python SDK
import asyncio
import os
import json
import unstructured_client
from unstructured_client.models import shared
client = unstructured_client.UnstructuredClient(
api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
)
async def call_api(filename, input_dir, output_dir):
req = {
"partition_parameters": {
"files": {
"content": open(filename, "rb"),
"file_name": os.path.basename(filename),
},
"chunking_strategy": "by_title",
"max_characters": 1024,
"strategy": shared.Strategy.VLM,
"vlm_model": "gpt-4o",
"vlm_model_provider": "openai",
"split_pdf_page": True,
"split_pdf_allow_failed": True,
"split_pdf_concurrency_level": 15
}
}
try:
res = await client.general.partition_async(
request=req
)
element_dicts = [element for element in res.elements]
json_elements = json.dumps(element_dicts, indent=2)
# Create the output directory structure.
relative_path = os.path.relpath(os.path.dirname(filename), input_dir)
output_subdir = os.path.join(output_dir, relative_path)
os.makedirs(output_subdir, exist_ok=True)
# Write the output file.
output_filename = os.path.join(output_subdir, os.path.basename(filename) + ".json")
with open(output_filename, "w") as file:
file.write(json_elements)
except Exception as e:
print(f"Error processing {filename}: {e}")
async def process_files(input_directory, output_directory):
tasks = []
for root, _, files in os.walk(input_directory):
for file in files:
if not file.endswith('.json'):
full_path = os.path.join(root, file)
tasks.append(call_api(full_path, input_directory, output_directory))
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(process_files(
input_directory=os.getenv("LOCAL_FILE_INPUT_DIR"),
output_directory=os.getenv("LOCAL_FILE_OUTPUT_DIR")
))
JavaScript/TypeScript SDK
import { UnstructuredClient } from "unstructured-client";
import * as fs from "fs";
import * as path from "path";
import { ChunkingStrategy, Strategy } from "unstructured-client/sdk/models/shared/index.js";
import { PartitionResponse } from "unstructured-client/sdk/models/operations";
// Send all files in the source path to Unstructured for processing.
// Send the processed data to the destination path.
function processFiles(
client: UnstructuredClient,
sourcePath: string,
destinationPath: string
): void {
// If an output directory does not exist for the corresponding input
// directory, then create it.
if (!fs.existsSync(destinationPath)) {
fs.mkdirSync(destinationPath, { recursive: true });
}
// Get all folders and files at the current level of the input directory.
const items = fs.readdirSync(sourcePath);
// For each folder and file in the input directory...
for (const item of items) {
const inputPath = path.join(sourcePath, item);
const outputPath = path.join(destinationPath, item)
// If it's a folder, call this function recursively.
if (fs.statSync(inputPath).isDirectory()) {
processFiles(client, inputPath, outputPath);
} else {
// If it's a file, send it to Unstructured for processing.
const data = fs.readFileSync(inputPath);
client.general.partition({
partitionParameters: {
files: {
content: data,
fileName: inputPath
},
strategy: Strategy.HiRes,
chunkingStrategy: ChunkingStrategy.ByTitle,
maxCharacters: 1024,
splitPdfPage: true,
splitPdfConcurrencyLevel: 15,
splitPdfAllowFailed: true
}
}).then((res: PartitionResponse) => {
// If successfully processed, write the processed data to
// the destination directory.
if (res.statusCode == 200) {
const jsonElements = JSON.stringify(res, null, 2)
fs.writeFileSync(outputPath + ".json", jsonElements)
}
}).catch((e) => {
if (e.statusCode) {
console.log(e.statusCode);
console.log(e.body);
} else {
console.log(e);
}
});
}
}
}
const client = new UnstructuredClient({
security: { apiKeyAuth: process.env.UNSTRUCTURED_API_KEY },
serverURL: process.env.UNSTRUCTURED_API_URL
});
processFiles(
client,
process.env.LOCAL_FILE_INPUT_DIR,
process.env.LOCAL_FILE_OUTPUT_DIR
);
Was this page helpful?