@@ -569,3 +569,74 @@ def test_get_dataset_with_version():
569569 # Verify fetching without version returns both items (latest)
570570 dataset_latest = langfuse .get_dataset (name )
571571 assert len (dataset_latest .items ) == 2
572+
573+
574+ def test_run_experiment_with_versioned_dataset ():
575+ """Test that running an experiment on a versioned dataset works correctly."""
576+ from datetime import timedelta
577+ import time
578+
579+ langfuse = Langfuse (debug = False )
580+
581+ # Create dataset
582+ name = create_uuid ()
583+ langfuse .create_dataset (name = name )
584+
585+ # Create first item
586+ langfuse .create_dataset_item (
587+ dataset_name = name , input = {"question" : "What is 2+2?" }, expected_output = 4
588+ )
589+ langfuse .flush ()
590+ time .sleep (3 )
591+
592+ # Fetch dataset to get the actual server-assigned timestamp of item1
593+ dataset_after_item1 = langfuse .get_dataset (name )
594+ assert len (dataset_after_item1 .items ) == 1
595+ item1_id = dataset_after_item1 .items [0 ].id
596+ item1_created_at = dataset_after_item1 .items [0 ].created_at
597+
598+ # Use a timestamp 1 second after item1's creation
599+ version_timestamp = item1_created_at + timedelta (seconds = 1 )
600+ time .sleep (3 )
601+
602+ # Update item1 after the version timestamp (this should not affect versioned query)
603+ langfuse .create_dataset_item (
604+ id = item1_id ,
605+ dataset_name = name ,
606+ input = {"question" : "What is 4+4?" },
607+ expected_output = 8 ,
608+ )
609+ langfuse .flush ()
610+ time .sleep (3 )
611+
612+ # Create second item (after version timestamp)
613+ langfuse .create_dataset_item (
614+ dataset_name = name , input = {"question" : "What is 3+3?" }, expected_output = 6
615+ )
616+ langfuse .flush ()
617+ time .sleep (3 )
618+
619+ # Get versioned dataset (should only have first item with ORIGINAL state)
620+ versioned_dataset = langfuse .get_dataset (name , version = version_timestamp )
621+ assert len (versioned_dataset .items ) == 1
622+ assert versioned_dataset .version == version_timestamp
623+ # Verify it returns the ORIGINAL version of item1 (before the update)
624+ assert versioned_dataset .items [0 ].input == {"question" : "What is 2+2?" }
625+ assert versioned_dataset .items [0 ].expected_output == 4
626+ assert versioned_dataset .items [0 ].id == item1_id
627+
628+ # Run a simple experiment on the versioned dataset
629+ def simple_task (* , item , ** kwargs ):
630+ # Just return a static answer
631+ return item .expected_output
632+
633+ result = versioned_dataset .run_experiment (
634+ name = "Versioned Dataset Test" ,
635+ description = "Testing experiment with versioned dataset" ,
636+ task = simple_task ,
637+ )
638+
639+ # Verify experiment ran successfully
640+ assert result .name == "Versioned Dataset Test"
641+ assert len (result .item_results ) == 1 # Only one item in versioned dataset
642+ assert result .item_results [0 ].output == 4
0 commit comments