diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
index 5a285be03..4ae66f6f3 100644
--- a/.buildkite/run-cpu-test.sh
+++ b/.buildkite/run-cpu-test.sh
@@ -30,7 +30,7 @@ function cpu_tests() {
   # offline inference
   docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" bash -c "
     set -e
-    python3 examples/offline_inference/offline_inference.py"
+    python3 examples/offline_inference/basic.py"
 
   # Run basic model test
   docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "
diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh
index 1e5ff7789..3e4e40946 100644
--- a/.buildkite/run-gh200-test.sh
+++ b/.buildkite/run-gh200-test.sh
@@ -24,5 +24,5 @@ remove_docker_container
 
 # Run the image and test offline inference
 docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
-    python3 examples/offline_inference/offline_inference.py
+    python3 examples/offline_inference/basic.py
 '
diff --git a/.buildkite/run-hpu-test.sh b/.buildkite/run-hpu-test.sh
index a50570ab5..8f3b08212 100644
--- a/.buildkite/run-hpu-test.sh
+++ b/.buildkite/run-hpu-test.sh
@@ -13,4 +13,4 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Run the image and launch offline inference
-docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/offline_inference.py
\ No newline at end of file
+docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic.py
\ No newline at end of file
diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh
index 52d485939..189714ebb 100644
--- a/.buildkite/run-neuron-test.sh
+++ b/.buildkite/run-neuron-test.sh
@@ -51,4 +51,4 @@ docker run --rm -it --device=/dev/neuron0 --device=/dev/neuron1 --network host \
        -e "NEURON_COMPILE_CACHE_URL=${NEURON_COMPILE_CACHE_MOUNT}" \
        --name "${container_name}" \
        ${image_name} \
-       /bin/bash -c "python3 /workspace/vllm/examples/offline_inference/offline_inference_neuron.py"
+       /bin/bash -c "python3 /workspace/vllm/examples/offline_inference/neuron.py"
diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh
index 380f7a44a..6159b21ff 100755
--- a/.buildkite/run-openvino-test.sh
+++ b/.buildkite/run-openvino-test.sh
@@ -13,4 +13,4 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Run the image and launch offline inference
-docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference/offline_inference.py
+docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference/basic.py
diff --git a/.buildkite/run-tpu-test.sh b/.buildkite/run-tpu-test.sh
index a8f021890..650af0fac 100644
--- a/.buildkite/run-tpu-test.sh
+++ b/.buildkite/run-tpu-test.sh
@@ -23,4 +23,4 @@ docker run --privileged --net host --shm-size=16G -it \
     && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
     && python3 /workspace/vllm/tests/tpu/test_compilation.py \
     && python3 /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
-    && python3 /workspace/vllm/examples/offline_inference/offline_inference_tpu.py"
+    && python3 /workspace/vllm/examples/offline_inference/tpu.py"
diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh
index 160e10aa3..4d344e58d 100644
--- a/.buildkite/run-xpu-test.sh
+++ b/.buildkite/run-xpu-test.sh
@@ -14,6 +14,6 @@ remove_docker_container
 
 # Run the image and test offline inference/tensor parallel
 docker run --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test sh -c '
-    python3 examples/offline_inference/offline_inference.py
-    python3 examples/offline_inference/offline_inference_cli.py -tp 2
+    python3 examples/offline_inference/basic.py
+    python3 examples/offline_inference/cli.py -tp 2
 '
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 7d1326954..d3bd809cf 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -187,19 +187,19 @@ steps:
   - examples/
   commands:
     - pip install tensorizer # for tensorizer test
-    - python3 offline_inference/offline_inference.py
+    - python3 offline_inference/basic.py
     - python3 offline_inference/cpu_offload.py
-    - python3 offline_inference/offline_inference_chat.py
-    - python3 offline_inference/offline_inference_with_prefix.py
+    - python3 offline_inference/chat.py
+    - python3 offline_inference/prefix_caching.py
     - python3 offline_inference/llm_engine_example.py
-    - python3 offline_inference/offline_inference_vision_language.py
-    - python3 offline_inference/offline_inference_vision_language_multi_image.py
+    - python3 offline_inference/vision_language.py
+    - python3 offline_inference/vision_language_multi_image.py
     - python3 other/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-    - python3 offline_inference/offline_inference_encoder_decoder.py
-    - python3 offline_inference/offline_inference_classification.py
-    - python3 offline_inference/offline_inference_embedding.py
-    - python3 offline_inference/offline_inference_scoring.py
-    - python3 offline_inference/offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2
+    - python3 offline_inference/encoder_decoder.py
+    - python3 offline_inference/classification.py
+    - python3 offline_inference/embedding.py
+    - python3 offline_inference/scoring.py
+    - python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
 
 - label: Prefix Caching Test # 9min
   mirror_hardwares: [amd]
diff --git a/docs/source/contributing/profiling/profiling_index.md b/docs/source/contributing/profiling/profiling_index.md
index 97de40ff4..001db86bd 100644
--- a/docs/source/contributing/profiling/profiling_index.md
+++ b/docs/source/contributing/profiling/profiling_index.md
@@ -26,7 +26,7 @@ Set the env variable VLLM_RPC_TIMEOUT to a big number before you start the serve
 
 ### Offline Inference
 
-Refer to <gh-file:examples/offline_inference/offline_inference_with_profiler.py> for an example.
+Refer to <gh-file:examples/offline_inference/simple_profiling.py> for an example.
 
 ### OpenAI Server
 
diff --git a/docs/source/features/structured_outputs.md b/docs/source/features/structured_outputs.md
index a42c3dd64..1d77c7339 100644
--- a/docs/source/features/structured_outputs.md
+++ b/docs/source/features/structured_outputs.md
@@ -257,4 +257,4 @@ outputs = llm.generate(
 print(outputs[0].outputs[0].text)
 ```
 
-Full example: <gh-file:examples/offline_inference/offline_inference_structured_outputs.py>
+Full example: <gh-file:examples/offline_inference/structured_outputs.py>
diff --git a/docs/source/getting_started/installation/cpu-x86.md b/docs/source/getting_started/installation/cpu-x86.md
index bb046dd0f..f4d3eec03 100644
--- a/docs/source/getting_started/installation/cpu-x86.md
+++ b/docs/source/getting_started/installation/cpu-x86.md
@@ -95,7 +95,7 @@ $ VLLM_TARGET_DEVICE=cpu python setup.py install
 $ sudo apt-get install libtcmalloc-minimal4 # install TCMalloc library
 $ find / -name *libtcmalloc* # find the dynamic link library path
 $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:$LD_PRELOAD # prepend the library to LD_PRELOAD
-$ python examples/offline_inference/offline_inference.py # run vLLM
+$ python examples/offline_inference/basic.py # run vLLM
 ```
 
 - When using the online serving, it is recommended to reserve 1-2 CPU cores for the serving framework to avoid CPU oversubscription. For example, on a platform with 32 physical CPU cores, reserving CPU 30 and 31 for the framework and using CPU 0-29 for OpenMP:
@@ -132,7 +132,7 @@ CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE    MAXMHZ   MINMHZ      MHZ
 
 # On this platform, it is recommend to only bind openMP threads on logical CPU cores 0-7 or 8-15
 $ export VLLM_CPU_OMP_THREADS_BIND=0-7
-$ python examples/offline_inference/offline_inference.py
+$ python examples/offline_inference/basic.py
 ```
 
 - If using vLLM CPU backend on a multi-socket machine with NUMA, be aware to set CPU cores using `VLLM_CPU_OMP_THREADS_BIND` to avoid cross NUMA node memory access.
diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md
index d7d43785c..6fd0083a9 100644
--- a/docs/source/getting_started/quickstart.md
+++ b/docs/source/getting_started/quickstart.md
@@ -40,7 +40,7 @@ For non-CUDA platforms, please refer [here](#installation-index) for specific in
 
 ## Offline Batched Inference
 
-With vLLM installed, you can start generating texts for list of input prompts (i.e. offline batch inferencing). See the example script: <gh-file:examples/offline_inference/offline_inference.py>
+With vLLM installed, you can start generating texts for list of input prompts (i.e. offline batch inferencing). See the example script: <gh-file:examples/offline_inference/basic.py>
 
 The first line of this example imports the classes {class}`~vllm.LLM` and {class}`~vllm.SamplingParams`:
 
diff --git a/docs/source/models/generative_models.md b/docs/source/models/generative_models.md
index 6a5a58ad7..e4b4cd03a 100644
--- a/docs/source/models/generative_models.md
+++ b/docs/source/models/generative_models.md
@@ -46,7 +46,7 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
-A code example can be found here: <gh-file:examples/offline_inference/offline_inference.py>
+A code example can be found here: <gh-file:examples/offline_inference/basic.py>
 
 ### `LLM.beam_search`
 
@@ -103,7 +103,7 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
-A code example can be found here: <gh-file:examples/offline_inference/offline_inference_chat.py>
+A code example can be found here: <gh-file:examples/offline_inference/chat.py>
 
 If the model doesn't have a chat template or you want to specify another one,
 you can explicitly pass a chat template:
diff --git a/docs/source/models/pooling_models.md b/docs/source/models/pooling_models.md
index 324b1f550..91db694be 100644
--- a/docs/source/models/pooling_models.md
+++ b/docs/source/models/pooling_models.md
@@ -88,7 +88,7 @@ embeds = output.outputs.embedding
 print(f"Embeddings: {embeds!r} (size={len(embeds)})")
 ```
 
-A code example can be found here: <gh-file:examples/offline_inference/offline_inference_embedding.py>
+A code example can be found here: <gh-file:examples/offline_inference/embedding.py>
 
 ### `LLM.classify`
 
@@ -103,7 +103,7 @@ probs = output.outputs.probs
 print(f"Class Probabilities: {probs!r} (size={len(probs)})")
 ```
 
-A code example can be found here: <gh-file:examples/offline_inference/offline_inference_classification.py>
+A code example can be found here: <gh-file:examples/offline_inference/classification.py>
 
 ### `LLM.score`
 
@@ -125,7 +125,7 @@ score = output.outputs.score
 print(f"Score: {score}")
 ```
 
-A code example can be found here: <gh-file:examples/offline_inference/offline_inference_scoring.py>
+A code example can be found here: <gh-file:examples/offline_inference/scoring.py>
 
 ## Online Serving
 
diff --git a/docs/source/serving/multimodal_inputs.md b/docs/source/serving/multimodal_inputs.md
index a06f121a6..53f5a274e 100644
--- a/docs/source/serving/multimodal_inputs.md
+++ b/docs/source/serving/multimodal_inputs.md
@@ -60,7 +60,7 @@ for o in outputs:
     print(generated_text)
 ```
 
-Full example: <gh-file:examples/offline_inference/offline_inference_vision_language.py>
+Full example: <gh-file:examples/offline_inference/vision_language.py>
 
 To substitute multiple images inside the same text prompt, you can pass in a list of images instead:
 
@@ -91,7 +91,7 @@ for o in outputs:
     print(generated_text)
 ```
 
-Full example: <gh-file:examples/offline_inference/offline_inference_vision_language_multi_image.py>
+Full example: <gh-file:examples/offline_inference/vision_language_multi_image.py>
 
 Multi-image input can be extended to perform video captioning. We show this with [Qwen2-VL](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) as it supports videos:
 
@@ -125,13 +125,13 @@ for o in outputs:
 You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary
 instead of using multi-image input.
 
-Full example: <gh-file:examples/offline_inference/offline_inference_vision_language.py>
+Full example: <gh-file:examples/offline_inference/vision_language.py>
 
 ### Audio
 
 You can pass a tuple `(array, sampling_rate)` to the `'audio'` field of the multi-modal dictionary.
 
-Full example: <gh-file:examples/offline_inference/offline_inference_audio_language.py>
+Full example: <gh-file:examples/offline_inference/audio_language.py>
 
 ### Embedding
 
diff --git a/examples/offline_inference/offline_inference_arctic.py b/examples/offline_inference/arctic.py
similarity index 100%
rename from examples/offline_inference/offline_inference_arctic.py
rename to examples/offline_inference/arctic.py
diff --git a/examples/offline_inference/offline_inference_audio_language.py b/examples/offline_inference/audio_language.py
similarity index 100%
rename from examples/offline_inference/offline_inference_audio_language.py
rename to examples/offline_inference/audio_language.py
diff --git a/examples/offline_inference/offline_inference.py b/examples/offline_inference/basic.py
similarity index 100%
rename from examples/offline_inference/offline_inference.py
rename to examples/offline_inference/basic.py
diff --git a/examples/offline_inference/offline_inference_with_default_generation_config.py b/examples/offline_inference/basic_with_model_default_sampling.py
similarity index 100%
rename from examples/offline_inference/offline_inference_with_default_generation_config.py
rename to examples/offline_inference/basic_with_model_default_sampling.py
diff --git a/examples/offline_inference/offline_inference_chat.py b/examples/offline_inference/chat.py
similarity index 100%
rename from examples/offline_inference/offline_inference_chat.py
rename to examples/offline_inference/chat.py
diff --git a/examples/offline_inference/offline_chat_with_tools.py b/examples/offline_inference/chat_with_tools.py
similarity index 100%
rename from examples/offline_inference/offline_chat_with_tools.py
rename to examples/offline_inference/chat_with_tools.py
diff --git a/examples/offline_inference/offline_inference_classification.py b/examples/offline_inference/classification.py
similarity index 100%
rename from examples/offline_inference/offline_inference_classification.py
rename to examples/offline_inference/classification.py
diff --git a/examples/offline_inference/offline_inference_cli.py b/examples/offline_inference/cli.py
similarity index 100%
rename from examples/offline_inference/offline_inference_cli.py
rename to examples/offline_inference/cli.py
diff --git a/examples/offline_inference/offline_inference_distributed.py b/examples/offline_inference/distributed.py
similarity index 100%
rename from examples/offline_inference/offline_inference_distributed.py
rename to examples/offline_inference/distributed.py
diff --git a/examples/offline_inference/offline_inference_embedding.py b/examples/offline_inference/embedding.py
similarity index 100%
rename from examples/offline_inference/offline_inference_embedding.py
rename to examples/offline_inference/embedding.py
diff --git a/examples/offline_inference/offline_inference_encoder_decoder.py b/examples/offline_inference/encoder_decoder.py
similarity index 100%
rename from examples/offline_inference/offline_inference_encoder_decoder.py
rename to examples/offline_inference/encoder_decoder.py
diff --git a/examples/offline_inference/florence2_inference.py b/examples/offline_inference/florence2_inference.py
index 49dd2c331..c24096e90 100644
--- a/examples/offline_inference/florence2_inference.py
+++ b/examples/offline_inference/florence2_inference.py
@@ -3,7 +3,7 @@ Demonstrate prompting of text-to-text
 encoder/decoder models, specifically Florence-2
 '''
 # TODO(Isotr0py):
-# Move to offline_inference/offline_inference_vision_language.py
+# Move to offline_inference/vision_language.py
 # after porting vision backbone
 from vllm import LLM, SamplingParams
 
diff --git a/examples/offline_inference/offline_inference_mlpspeculator.py b/examples/offline_inference/mlpspeculator.py
similarity index 100%
rename from examples/offline_inference/offline_inference_mlpspeculator.py
rename to examples/offline_inference/mlpspeculator.py
diff --git a/examples/offline_inference/offline_inference_neuron.py b/examples/offline_inference/neuron.py
similarity index 100%
rename from examples/offline_inference/offline_inference_neuron.py
rename to examples/offline_inference/neuron.py
diff --git a/examples/offline_inference/offline_inference_neuron_int8_quantization.py b/examples/offline_inference/neuron_int8_quantization.py
similarity index 100%
rename from examples/offline_inference/offline_inference_neuron_int8_quantization.py
rename to examples/offline_inference/neuron_int8_quantization.py
diff --git a/examples/offline_inference/offline_inference_openai/offline_inference_openai.md b/examples/offline_inference/openai/openai_batch.md
similarity index 92%
rename from examples/offline_inference/offline_inference_openai/offline_inference_openai.md
rename to examples/offline_inference/openai/openai_batch.md
index 6278a1943..a4774e57c 100644
--- a/examples/offline_inference/offline_inference_openai/offline_inference_openai.md
+++ b/examples/offline_inference/openai/openai_batch.md
@@ -8,7 +8,7 @@ This is a guide to performing batch inference using the OpenAI batch file format
  
 The OpenAI batch file format consists of a series of json objects on new lines.
  
-[See here for an example file.](https://github.com/vllm-project/vllm/blob/main/examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl)
+[See here for an example file.](https://github.com/vllm-project/vllm/blob/main/examples/offline_inference/openai/openai_example_batch.jsonl)
  
 Each line represents a separate request. See the [OpenAI package reference](https://platform.openai.com/docs/api-reference/batch/requestInput) for more details.
  
@@ -31,13 +31,13 @@ We currently only support `/v1/chat/completions` and `/v1/embeddings` endpoints
 To follow along with this example, you can download the example batch, or create your own batch file in your working directory.
 
 ```
-wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl
+wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai/openai_example_batch.jsonl
 ```
 
 Once you've created your batch file it should look like this
 
 ```
-$ cat offline_inference/offline_inference_openai/openai_example_batch.jsonl
+$ cat offline_inference/openai/openai_example_batch.jsonl
 {"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 ```
@@ -49,7 +49,7 @@ The batch running tool is designed to be used from the command line.
 You can run the batch with the following command, which will write its results to a file called `results.jsonl`
 
 ```
-python -m vllm.entrypoints.openai.run_batch -i offline_inference/offline_inference_openai/openai_example_batch.jsonl -o results.jsonl --model meta-llama/Meta-Llama-3-8B-Instruct
+python -m vllm.entrypoints.openai.run_batch -i offline_inference/openai/openai_example_batch.jsonl -o results.jsonl --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
 
 ### Step 3: Check your results
@@ -66,10 +66,10 @@ $ cat results.jsonl
 
 The batch runner supports remote input and output urls that are accessible via http/https.
 
-For example, to run against our example input file located at `https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl`, you can run
+For example, to run against our example input file located at `https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai/openai_example_batch.jsonl`, you can run
 
 ```
-python -m vllm.entrypoints.openai.run_batch -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl -o results.jsonl --model meta-llama/Meta-Llama-3-8B-Instruct
+python -m vllm.entrypoints.openai.run_batch -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai/openai_example_batch.jsonl -o results.jsonl --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
 
 ## Example 3: Integrating with AWS S3
@@ -90,13 +90,13 @@ To integrate with cloud blob storage, we recommend using presigned urls.
 To follow along with this example, you can download the example batch, or create your own batch file in your working directory.
 
 ```
-wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl
+wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai/openai_example_batch.jsonl
 ```
 
 Once you've created your batch file it should look like this
 
 ```
-$ cat offline_inference/offline_inference_openai/openai_example_batch.jsonl
+$ cat offline_inference/openai/openai_example_batch.jsonl
 {"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 ```
@@ -104,7 +104,7 @@ $ cat offline_inference/offline_inference_openai/openai_example_batch.jsonl
 Now upload your batch file to your S3 bucket.
 
 ```
-aws s3 cp offline_inference/offline_inference_openai/openai_example_batch.jsonl s3://MY_BUCKET/MY_INPUT_FILE.jsonl
+aws s3 cp offline_inference/openai/openai_example_batch.jsonl s3://MY_BUCKET/MY_INPUT_FILE.jsonl
 ```
 
 ### Step 2: Generate your presigned urls
diff --git a/examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl b/examples/offline_inference/openai/openai_example_batch.jsonl
similarity index 100%
rename from examples/offline_inference/offline_inference_openai/openai_example_batch.jsonl
rename to examples/offline_inference/openai/openai_example_batch.jsonl
diff --git a/examples/offline_inference/offline_inference_pixtral.py b/examples/offline_inference/pixtral.py
similarity index 100%
rename from examples/offline_inference/offline_inference_pixtral.py
rename to examples/offline_inference/pixtral.py
diff --git a/examples/offline_inference/offline_inference_with_prefix.py b/examples/offline_inference/prefix_caching.py
similarity index 100%
rename from examples/offline_inference/offline_inference_with_prefix.py
rename to examples/offline_inference/prefix_caching.py
diff --git a/examples/offline_inference/offline_profile.py b/examples/offline_inference/profiling.py
similarity index 99%
rename from examples/offline_inference/offline_profile.py
rename to examples/offline_inference/profiling.py
index 187a05e4d..8a94b5c2a 100644
--- a/examples/offline_inference/offline_profile.py
+++ b/examples/offline_inference/profiling.py
@@ -363,7 +363,7 @@ Profile a model
 
     example:
     ```
-    python examples/offline_inference/offline_profile.py \\
+    python examples/offline_inference/profiling.py \\
         --model neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8 --batch-size 4 \\
         --prompt-len 512 --max-num-batched-tokens 8196 --json Llama31-8b-FP8 \\
         --enforce-eager run_num_steps -n 2
diff --git a/examples/offline_inference/offline_inference_scoring.py b/examples/offline_inference/scoring.py
similarity index 100%
rename from examples/offline_inference/offline_inference_scoring.py
rename to examples/offline_inference/scoring.py
diff --git a/examples/offline_inference/offline_inference_with_profiler.py b/examples/offline_inference/simple_profiling.py
similarity index 100%
rename from examples/offline_inference/offline_inference_with_profiler.py
rename to examples/offline_inference/simple_profiling.py
diff --git a/examples/offline_inference/offline_inference_structured_outputs.py b/examples/offline_inference/structured_outputs.py
similarity index 100%
rename from examples/offline_inference/offline_inference_structured_outputs.py
rename to examples/offline_inference/structured_outputs.py
diff --git a/examples/offline_inference/offline_inference_tpu.py b/examples/offline_inference/tpu.py
similarity index 100%
rename from examples/offline_inference/offline_inference_tpu.py
rename to examples/offline_inference/tpu.py
diff --git a/examples/offline_inference/offline_inference_vision_language.py b/examples/offline_inference/vision_language.py
similarity index 100%
rename from examples/offline_inference/offline_inference_vision_language.py
rename to examples/offline_inference/vision_language.py
diff --git a/examples/offline_inference/offline_inference_vision_language_embedding.py b/examples/offline_inference/vision_language_embedding.py
similarity index 100%
rename from examples/offline_inference/offline_inference_vision_language_embedding.py
rename to examples/offline_inference/vision_language_embedding.py
diff --git a/examples/offline_inference/offline_inference_vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
similarity index 100%
rename from examples/offline_inference/offline_inference_vision_language_multi_image.py
rename to examples/offline_inference/vision_language_multi_image.py
diff --git a/examples/offline_inference/offline_inference_whisper.py b/examples/offline_inference/whisper.py
similarity index 100%
rename from examples/offline_inference/offline_inference_whisper.py
rename to examples/offline_inference/whisper.py
diff --git a/tests/plugins_tests/test_platform_plugins.py b/tests/plugins_tests/test_platform_plugins.py
index 57518bd3e..69698b34c 100644
--- a/tests/plugins_tests/test_platform_plugins.py
+++ b/tests/plugins_tests/test_platform_plugins.py
@@ -5,7 +5,7 @@ def test_platform_plugins():
     import os
     example_file = os.path.join(
         os.path.dirname(os.path.dirname(os.path.dirname(current_file))),
-        "examples", "offline_inference/offline_inference.py")
+        "examples", "offline_inference/basic.py")
     runpy.run_path(example_file)
 
     # check if the plugin is loaded correctly
diff --git a/tools/profiler/print_layerwise_table.py b/tools/profiler/print_layerwise_table.py
index 49366abc7..54cd60c2b 100644
--- a/tools/profiler/print_layerwise_table.py
+++ b/tools/profiler/print_layerwise_table.py
@@ -31,7 +31,7 @@ if __name__ == "__main__":
                         type=str,
                         required=True,
                         help="json trace file output by "
-                        "examples/offline_inference/offline_profile.py")
+                        "examples/offline_inference/profiling.py")
     parser.add_argument("--phase",
                         type=str,
                         required=True,
diff --git a/tools/profiler/visualize_layerwise_profile.py b/tools/profiler/visualize_layerwise_profile.py
index fa88ed420..cb56ebd69 100644
--- a/tools/profiler/visualize_layerwise_profile.py
+++ b/tools/profiler/visualize_layerwise_profile.py
@@ -538,7 +538,7 @@ if __name__ == "__main__":
                         type=str,
                         required=True,
                         help="json trace file output by \
-                              examples/offline_inference/offline_profile.py")
+                              examples/offline_inference/profiling.py")
     parser.add_argument("--output-directory",
                         type=str,
                         required=False,