diff --git a/.gitattributes b/.gitattributes
index b18ecf2182c490f5a25a539197892f96d9199a29..45dc233c563f29828647ea33a44da9e56ac538fe 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -36,3 +36,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 images/curves.png filter=lfs diff=lfs merge=lfs -text
 images/demo2.png filter=lfs diff=lfs merge=lfs -text
 images/performance.png filter=lfs diff=lfs merge=lfs -text
+images/dataset.png filter=lfs diff=lfs merge=lfs -text
+images/demo1.png filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/src/r1-v/temp_image.png filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/src/distill_r1/grpo_r1_distilled.jpg filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/images/7B_nextqa.png filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/images/sample.png filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/images/CATER_new_003595.gif filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/images/2B_curve.png filter=lfs diff=lfs merge=lfs -text
+previous_version/Video-R1-main-previous/images/7B_curve.png filter=lfs diff=lfs merge=lfs -text
diff --git a/images/dataset.png b/images/dataset.png
new file mode 100644
index 0000000000000000000000000000000000000000..f6d3586139533ee0137eab4af66770e183719e00
--- /dev/null
+++ b/images/dataset.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0affaa1cf8d870c6a6ec41be54494e073c51987fe5ad424a8ee3437b1dcc116
+size 589436
diff --git a/images/demo1.png b/images/demo1.png
new file mode 100644
index 0000000000000000000000000000000000000000..71e638787b5b57e6de2879cdf5ef1691121a9b7e
--- /dev/null
+++ b/images/demo1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94c40671d8761915a8de02f4548f0e1715069aa8d171f08d5b27af3f2a715548
+size 1018127
diff --git a/previous_version/Video-R1-main-previous/images/2B_curve.png b/previous_version/Video-R1-main-previous/images/2B_curve.png
new file mode 100644
index 0000000000000000000000000000000000000000..35b2f69bc56cb1e692e0d837709824054ce990d2
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/images/2B_curve.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f5b2aaa2c501639cc570bf9f8b8a94dedf3e3d8f9b2ad2ef6e13d01478b733d
+size 321386
diff --git a/previous_version/Video-R1-main-previous/images/7B_curve.png b/previous_version/Video-R1-main-previous/images/7B_curve.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f7dd47665df620e9e9bc5204f5baddf3b89575c
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/images/7B_curve.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38e42d31de8bf93659529b9334c1aa58c71d91fa55e1eeef7f4f6fece1ca4663
+size 309735
diff --git a/previous_version/Video-R1-main-previous/images/7B_nextqa.png b/previous_version/Video-R1-main-previous/images/7B_nextqa.png
new file mode 100644
index 0000000000000000000000000000000000000000..b1e03956c270bbd8cad0beb9f4d1ecfa04cdb0dd
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/images/7B_nextqa.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99c0f930a3f67a870386ee16896b1f45a3c84dfd43b27dd4d128a8ae66406f19
+size 333664
diff --git a/previous_version/Video-R1-main-previous/images/CATER_new_003595.gif b/previous_version/Video-R1-main-previous/images/CATER_new_003595.gif
new file mode 100644
index 0000000000000000000000000000000000000000..f316e680e376dbc150976d48eef46d249ce0922a
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/images/CATER_new_003595.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ed0306a7a088e526eb2ccfb8e0f44d987fa48548248649f1cd4a270955634cd
+size 776837
diff --git a/previous_version/Video-R1-main-previous/images/sample.png b/previous_version/Video-R1-main-previous/images/sample.png
new file mode 100644
index 0000000000000000000000000000000000000000..657e3909256ac1aab5a8e4773a7f4c0bf03179d4
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/images/sample.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e616764501a3833e9035ccd48b79b19f23cc02c597cedde681edf0b63f27d09c
+size 243947
diff --git a/previous_version/Video-R1-main-previous/src/distill_r1/create_hf_dataset.py b/previous_version/Video-R1-main-previous/src/distill_r1/create_hf_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f757aa9583dafaa86d2263c56e798b2d3f77dde4
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/distill_r1/create_hf_dataset.py
@@ -0,0 +1,119 @@
+import json
+import os
+import random
+from datasets import load_dataset
+from tqdm import tqdm
+
+random.seed(1234)
+VAL_NUM = 5000
+
+
+def create_r1_train_dataset(
+    valid_pair_json,
+    data_dir,
+    img_dir="/home/lilei/Visual-R1/CLEVR_CoGenT_v1.0/images/trainA/",
+):
+    os.makedirs(data_dir, exist_ok=True)
+    pairs = [json.loads(line) for line in open(valid_pair_json, "r")]
+    mapped_pairs = []
+
+    for idx, pair in tqdm(enumerate(pairs)):
+        img_filename = pair["img_filename"]
+        new_pair = {}
+        try:
+            new_pair["thinking"] = (
+                pair["r1_response"]
+                .split("<think>")[1]
+                .split("</think>")[0]
+                .replace("scene description", "image")
+            )
+        except Exception as e:
+            print(f"Error processing pair response: ", pair["r1_response"])
+            continue  # skip this pair
+        # add index to distinguish the same image
+        dataset_filename = (
+            img_filename.split(".")[0] + "_" + str(idx) + "." + img_filename.split(".")[1]
+        )
+        if not os.path.exists(f"{data_dir}/{img_filename}"):
+            os.system(f"cp {img_dir}/{img_filename} {data_dir}/{dataset_filename}")
+        q, a = pair["q"], pair["a"]
+        new_pair["problem"] = q
+        # get the thinking path
+        
+        new_pair["thinking"] = "<think>" + new_pair["thinking"] + "</think>"
+        new_pair["solution"] = f"<answer> {a} </answer>"
+        new_pair["file_name"] = dataset_filename
+        mapped_pairs.append(new_pair)
+    with open(f"{data_dir}/metadata.jsonl", "w") as f:
+        for pair in mapped_pairs:
+            f.write(json.dumps(pair) + "\n")
+
+    train_dataset = load_dataset(
+        "imagefolder",
+        data_dir=data_dir,
+        split="train",
+    )
+    return train_dataset
+
+
+def create_val_dataset(
+    json_file,
+    data_dir,
+    val_num=VAL_NUM,
+    image_dir="/home/lilei/Visual-R1/CLEVR_CoGenT_v1.0/images/valB",
+):
+    os.makedirs(data_dir, exist_ok=True)
+    val = json.load(open(json_file))
+    random.shuffle(val)
+    val = val[:val_num]
+    val_pairs = []
+    for idx, pair in tqdm(enumerate(val)):
+        q, a = pair["q"], pair["a"]
+        img_filename = pair["img_filename"]
+        # copy images to the DATA_DIR
+        val_filename = (
+            img_filename.split(".")[0] + f"_{idx}." + img_filename.split(".")[1]
+        )
+        if not os.path.exists(f"{data_dir}/{img_filename}"):
+            os.system(f"cp {image_dir}/{img_filename} {data_dir}/{val_filename}")
+        new_pair = {}
+        new_pair["problem"] = q
+        new_pair["solution"] = f"<answer> {a} </answer>"
+        new_pair["file_name"] = val_filename
+        val_pairs.append(new_pair)
+    with open(f"{data_dir}/metadata.jsonl", "w") as f:
+        for pair in val_pairs:
+            f.write(json.dumps(pair) + "\n")
+    val_dataset = load_dataset("imagefolder", data_dir=data_dir, split="train")
+    return val_dataset
+
+
+# valA split
+VALA_DATA_DIR = "data/Clevr_CoGenT_ValA"
+VALB_DATA_DIR = "data/Clevr_CoGenT_ValB"
+valA_json = (
+    "/home/lilei/Visual-R1/data/clever_counting_problems_clevr_cogent_v1.0_valA.json"
+)
+valB_json = (
+    "/home/lilei/Visual-R1/data/clever_counting_problems_clevr_cogent_v1.0_valB.json"
+)
+TRAIN_DATADIR = "data/Clevr_CoGenT_TrainA_R1"
+train_dataset = create_r1_train_dataset(
+    "/home/lilei/Visual-R1/filter_results_v2/valid_pairs.jsonl",
+    TRAIN_DATADIR,
+)
+
+# print(train_dataset)
+valA_dataset = create_val_dataset(
+    valA_json,
+    VALA_DATA_DIR,
+    image_dir="/home/lilei/Visual-R1/CLEVR_CoGenT_v1.0/images/valA",
+)
+valB_dataset = create_val_dataset(
+    valB_json,
+    VALB_DATA_DIR,
+    image_dir="/home/lilei/Visual-R1/CLEVR_CoGenT_v1.0/images/valB",
+)
+valA_dataset.push_to_hub("MMInstruction/Clevr_CoGenT_ValA")
+valB_dataset.push_to_hub("MMInstruction/Clevr_CoGenT_ValB")
+train_dataset.push_to_hub("MMInstruction/Clevr_CoGenT_TrainA_R1")
diff --git a/previous_version/Video-R1-main-previous/src/distill_r1/generate_scene_qa_pairs.ipynb b/previous_version/Video-R1-main-previous/src/distill_r1/generate_scene_qa_pairs.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..a24effa528fccba4541e1f9a7b4360aa981592dd
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/distill_r1/generate_scene_qa_pairs.ipynb
@@ -0,0 +1,569 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3a704ea6-2e61-4aaa-97aa-416579c9bc13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c4920a8f-cddd-4063-8cab-215d238b5dad",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CLEVR_trainA_scenes.json  CLEVR_valA_scenes.json  CLEVR_valB_scenes.json\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls CLEVR_CoGenT_v1.0/scenes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "934fa005-3b2a-43ed-8a71-6a12b7579546",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "split = \"valB\"\n",
+    "clevr_train_json = f\"CLEVR_CoGenT_v1.0/scenes/CLEVR_{split}_scenes.json\"\n",
+    "train_qs = f\"CLEVR_CoGenT_v1.0/questions/CLEVR_{split}_questions.json\"\n",
+    "data = json.load(open(clevr_train_json))\n",
+    "qs = json.load(open(train_qs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "1f0d6180-94c4-4aea-bd2b-8d5cfeb0aecb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'pixel_coords': [343, 131, 11.278693199157715], 'size': 'small', 'color': 'green', 'material': 'metal', 'shape': 'sphere', '3d_coords': [0.9906095862388611, 2.083291530609131, 0.3499999940395355], 'rotation': 107.73596690369371}, {'pixel_coords': [396, 172, 9.857704162597656], 'size': 'small', 'color': 'cyan', 'material': 'rubber', 'shape': 'sphere', '3d_coords': [2.69626522064209, 1.5257188081741333, 0.3499999940395355], 'rotation': 305.3536122513589}, {'pixel_coords': [115, 182, 8.91348934173584], 'size': 'large', 'color': 'yellow', 'material': 'rubber', 'shape': 'cylinder', '3d_coords': [0.049163494259119034, -2.864100217819214, 0.699999988079071], 'rotation': 161.8370138842408}, {'pixel_coords': [203, 131, 10.548327445983887], 'size': 'large', 'color': 'purple', 'material': 'rubber', 'shape': 'cube', '3d_coords': [-0.4719269275665283, -0.5699371695518494, 0.699999988079071], 'rotation': 159.41862667811446}, {'pixel_coords': [253, 75, 13.141877174377441], 'size': 'large', 'color': 'red', 'material': 'rubber', 'shape': 'cube', '3d_coords': [-2.036878824234009, 2.222999334335327, 0.699999988079071], 'rotation': 37.40490732771224}]\n",
+      "len:  5\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data['scenes'][0]['objects'])\n",
+    "print(\"len: \", len(data['scenes'][0]['objects']))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "7c828ca4-08f9-4927-a745-224a95379c2f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def object_info_to_description(object_list):\n",
+    "    descriptions = []\n",
+    "    random.shuffle(object_list)\n",
+    "    for obj in object_list:\n",
+    "        desc = f\"A {obj['size']} {obj['color']} {obj['material']} {obj['shape']}\"\n",
+    "        desc += f\" rotated {obj['rotation']:.1f}° located at\"\n",
+    "        desc += f\" 3D coordinates ({obj['3d_coords'][0]:.2f}, {obj['3d_coords'][1]:.2f}, {obj['3d_coords'][2]:.2f})\"\n",
+    "        desc += f\" and pixel coordinates ({obj['pixel_coords'][0]}, {obj['pixel_coords'][1]}, {obj['pixel_coords'][2]:.2f})\"\n",
+    "        descriptions.append(desc)\n",
+    "    \n",
+    "    final_description = \"Scene Description:\\n\"\n",
+    "    for i, desc in enumerate(descriptions, 1):\n",
+    "        final_description += f\"{desc}\\n\"\n",
+    "    \n",
+    "    return final_description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "cb048e25-d554-4bd7-bf11-878e071b5987",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Scene Description:\\nA large yellow rubber cylinder rotated 161.8° located at 3D coordinates (0.05, -2.86, 0.70) and pixel coordinates (115, 182, 8.91)\\nA large purple rubber cube rotated 159.4° located at 3D coordinates (-0.47, -0.57, 0.70) and pixel coordinates (203, 131, 10.55)\\nA large red rubber cube rotated 37.4° located at 3D coordinates (-2.04, 2.22, 0.70) and pixel coordinates (253, 75, 13.14)\\nA small green metal sphere rotated 107.7° located at 3D coordinates (0.99, 2.08, 0.35) and pixel coordinates (343, 131, 11.28)\\nA small cyan rubber sphere rotated 305.4° located at 3D coordinates (2.70, 1.53, 0.35) and pixel coordinates (396, 172, 9.86)\\n'"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "object_info_to_description(data['scenes'][0]['objects'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "ffacd5f3-e9a4-46ca-8c50-187ab12c9f1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img2obj_dict = {}\n",
+    "for scene in data['scenes']:\n",
+    "    obj_list = scene['objects']\n",
+    "    img2obj_dict[scene['image_filename']] = obj_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "db35f03c-1529-4776-bf4f-3bd44e960e5f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'question_index': 0,\n",
+       " 'question_family_index': 29,\n",
+       " 'image_index': 0,\n",
+       " 'question': 'The big thing that is in front of the large rubber cube in front of the small thing that is behind the tiny matte ball is what color?',\n",
+       " 'answer': 'yellow',\n",
+       " 'image_filename': 'CLEVR_valB_000000.png',\n",
+       " 'split': 'valB',\n",
+       " 'program': [{'value_inputs': [], 'inputs': [], 'function': 'scene'},\n",
+       "  {'value_inputs': ['small'], 'inputs': [0], 'function': 'filter_size'},\n",
+       "  {'value_inputs': ['rubber'], 'inputs': [1], 'function': 'filter_material'},\n",
+       "  {'value_inputs': ['sphere'], 'inputs': [2], 'function': 'filter_shape'},\n",
+       "  {'value_inputs': [], 'inputs': [3], 'function': 'unique'},\n",
+       "  {'value_inputs': ['behind'], 'inputs': [4], 'function': 'relate'},\n",
+       "  {'value_inputs': ['small'], 'inputs': [5], 'function': 'filter_size'},\n",
+       "  {'value_inputs': [], 'inputs': [6], 'function': 'unique'},\n",
+       "  {'value_inputs': ['front'], 'inputs': [7], 'function': 'relate'},\n",
+       "  {'value_inputs': ['large'], 'inputs': [8], 'function': 'filter_size'},\n",
+       "  {'value_inputs': ['rubber'], 'inputs': [9], 'function': 'filter_material'},\n",
+       "  {'value_inputs': ['cube'], 'inputs': [10], 'function': 'filter_shape'},\n",
+       "  {'value_inputs': [], 'inputs': [11], 'function': 'unique'},\n",
+       "  {'value_inputs': ['front'], 'inputs': [12], 'function': 'relate'},\n",
+       "  {'value_inputs': ['large'], 'inputs': [13], 'function': 'filter_size'},\n",
+       "  {'value_inputs': [], 'inputs': [14], 'function': 'unique'},\n",
+       "  {'value_inputs': [], 'inputs': [15], 'function': 'query_color'}]}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "qs['questions'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "66b746fc-569c-4922-a442-79dbbc09e33b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random.shuffle(qs['questions'])\n",
+    "cnt = 0 \n",
+    "qa_pairs = [] \n",
+    "added_pair = set()\n",
+    "for qd in qs['questions']:\n",
+    "    img_idx = qd['image_filename']\n",
+    "    total_count = len(img2obj_dict[img_idx]) # object list length\n",
+    "    desc = object_info_to_description(img2obj_dict[img_idx])\n",
+    "    question, answer = qd['question'], qd['answer']\n",
+    "    if 'how many' in question.lower() or 'number' in question.lower():\n",
+    "        qa_pairs.append({\n",
+    "            \"img_filename\": img_idx,\n",
+    "            'q': question,\n",
+    "            'a': answer,\n",
+    "            'description': desc \n",
+    "        })\n",
+    "        if img_idx not in added_pair:\n",
+    "            qa_pairs.append({\n",
+    "                \"img_filename\": img_idx,\n",
+    "                'q': \"How many items are there in the described scene?\",\n",
+    "                'a': total_count,\n",
+    "                'description': desc \n",
+    "            })\n",
+    "            added_pair.add(img_idx)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "c271fa7b-fed5-472f-a302-6ec203c4b787",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "59978"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(qa_pairs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "b0da8a70-c3f5-4e48-b384-3684933d72ef",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "14884"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(added_pair)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "c648587e-2ec0-427c-b594-f55dd187b4d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save for later loading\n",
+    "with open(f\"clever_counting_problems_clevr_cogent_v1.0_{split}.json\", 'w') as fw:\n",
+    "    json.dump( qa_pairs, fw, indent=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "b3a8cbe4-4261-41d3-a481-43a0b1cc2795",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random.shuffle(qa_pairs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "d6dff4e7-65dd-4e82-82df-340ec2a57919",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'img_filename': 'CLEVR_trainA_048403.png',\n",
+       "  'q': 'How many things are both on the right side of the big yellow rubber thing and left of the purple ball?',\n",
+       "  'a': '5',\n",
+       "  'description': 'Scene Description:\\nA large red rubber cylinder rotated 291.3° located at 3D coordinates (-0.89, -2.73, 0.70) and pixel coordinates (101, 152, 10.04)\\nA small purple metal sphere rotated 247.7° located at 3D coordinates (2.93, 0.87, 0.35) and pixel coordinates (379, 183, 9.66)\\nA large cyan rubber cylinder rotated 114.5° located at 3D coordinates (-2.40, 2.23, 0.70) and pixel coordinates (246, 82, 13.94)\\nA small red metal cylinder rotated 109.9° located at 3D coordinates (-0.95, 1.77, 0.35) and pixel coordinates (270, 113, 12.83)\\nA small red rubber cylinder rotated 343.7° located at 3D coordinates (-0.12, -0.74, 0.35) and pixel coordinates (209, 153, 10.82)\\nA large red rubber cylinder rotated 324.5° located at 3D coordinates (-2.71, -2.21, 0.70) and pixel coordinates (84, 119, 11.59)\\nA small red metal cylinder rotated 1.1° located at 3D coordinates (2.88, -0.12, 0.35) and pixel coordinates (342, 200, 9.12)\\nA small gray rubber cube rotated 144.9° located at 3D coordinates (0.79, 0.98, 0.35) and pixel coordinates (299, 145, 11.19)\\nA large yellow rubber cube rotated 90.0° located at 3D coordinates (-1.78, -0.31, 0.70) and pixel coordinates (180, 110, 12.05)\\n'},\n",
+       " {'img_filename': 'CLEVR_trainA_048403.png',\n",
+       "  'q': 'How many items are there in the described scene?',\n",
+       "  'a': 9,\n",
+       "  'description': 'Scene Description:\\nA large red rubber cylinder rotated 291.3° located at 3D coordinates (-0.89, -2.73, 0.70) and pixel coordinates (101, 152, 10.04)\\nA small purple metal sphere rotated 247.7° located at 3D coordinates (2.93, 0.87, 0.35) and pixel coordinates (379, 183, 9.66)\\nA large cyan rubber cylinder rotated 114.5° located at 3D coordinates (-2.40, 2.23, 0.70) and pixel coordinates (246, 82, 13.94)\\nA small red metal cylinder rotated 109.9° located at 3D coordinates (-0.95, 1.77, 0.35) and pixel coordinates (270, 113, 12.83)\\nA small red rubber cylinder rotated 343.7° located at 3D coordinates (-0.12, -0.74, 0.35) and pixel coordinates (209, 153, 10.82)\\nA large red rubber cylinder rotated 324.5° located at 3D coordinates (-2.71, -2.21, 0.70) and pixel coordinates (84, 119, 11.59)\\nA small red metal cylinder rotated 1.1° located at 3D coordinates (2.88, -0.12, 0.35) and pixel coordinates (342, 200, 9.12)\\nA small gray rubber cube rotated 144.9° located at 3D coordinates (0.79, 0.98, 0.35) and pixel coordinates (299, 145, 11.19)\\nA large yellow rubber cube rotated 90.0° located at 3D coordinates (-1.78, -0.31, 0.70) and pixel coordinates (180, 110, 12.05)\\n'}]"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "qa_pairs[:2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "a6a66364-5b47-4138-91d6-a045404d21b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def query_r1(query='who are you?', model=\"deepseek-ai/DeepSeek-R1\"):\n",
+    "    # Create the chat completion\n",
+    "    response = client.chat.completions.create(\n",
+    "        model=model,\n",
+    "         messages=[\n",
+    "            {'role': 'user', \n",
+    "            'content': query}\n",
+    "        ],\n",
+    "        stream=False,\n",
+    "    )\n",
+    "    # Print the response\n",
+    "    return response.choices[0].message.content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "e5d5649f-c4e3-4f3f-b76e-7f7ed27f68e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def format_query(qa_dict):\n",
+    "    query = \"Answer the question according to scene description.\\n\\n\"\n",
+    "    query += qa_dict['description']\n",
+    "    query += f\"\\nQuestion:\\n{qa_dict['q']}\"\n",
+    "    return query \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "7f568a4e-f217-464a-8329-bbefb64d9653",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<think>Okay, let's see. The user is asking how many items are there in the described scene. Let me go through the scene description step by step.\n",
+      "\n",
+      "So, the scene description lists each object with details like color, material, shape, rotation, 3D coordinates, and pixel coordinates. Each entry starts with \"A\" which usually indicates one item each. Let me count each one.\n",
+      "\n",
+      "First entry: \"A small green metal cylinder...\" That's one. Second: \"A small blue rubber cylinder...\" Second item. Third: \"A small cyan rubber cylinder...\" That's three. Fourth: \"A large cyan metal sphere...\" Four. Fifth: \"A large brown metal cube...\" Five. Sixth: \"A large yellow rubber cube...\" Six. Seventh: \"A large brown rubber cylinder...\" That's seven. \n",
+      "\n",
+      "Wait, did I miss any? Let me check again. The list has entries from \"A small green...\" up to the seventh one. Each sentence starts with \"A\", which suggests each is a separate item. No commas separating multiple items in a single entry. Each has different attributes and coordinates, so they must all be distinct. \n",
+      "\n",
+      "So the answer should be 7 items.\n",
+      "</think>\n",
+      "\n",
+      "There are 7 items in the described scene. Each entry corresponds to one distinct object, listed by their properties, coordinates, and rotations.\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "debug_query = format_query(qa_pairs[0])\n",
+    "print(query_r1(debug_query))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "cdc4231a-8ef4-4cf6-a575-d84ae7bbd0b5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer the question accordingly to scene description.\n",
+      "\n",
+      "Scene Description:\n",
+      "A small green metal cylinder rotated 329.5° located at 3D coordinates (-2.49, -1.65, 0.35) and pixel coordinates (111, 132, 11.81)\n",
+      "A small blue rubber cylinder rotated 312.2° located at 3D coordinates (-1.73, -2.91, 0.35) and pixel coordinates (76, 163, 10.57)\n",
+      "A small cyan rubber cylinder rotated 48.4° located at 3D coordinates (-2.10, -0.22, 0.35) and pixel coordinates (172, 118, 12.41)\n",
+      "A large cyan metal sphere rotated 27.4° located at 3D coordinates (1.52, -1.26, 0.70) and pixel coordinates (247, 181, 9.33)\n",
+      "A large brown metal cube rotated 107.7° located at 3D coordinates (-0.73, 2.39, 0.70) and pixel coordinates (290, 92, 12.93)\n",
+      "A large yellow rubber cube rotated 288.2° located at 3D coordinates (0.52, 0.63, 0.70) and pixel coordinates (279, 130, 11.09)\n",
+      "A large brown rubber cylinder rotated 229.8° located at 3D coordinates (2.38, 0.38, 0.70) and pixel coordinates (343, 166, 9.77)\n",
+      "\n",
+      "Question:\n",
+      "How many items are there in the described scene?\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(debug_query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "4cf90eb6-2cce-4e3d-8190-c44168a66dca",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'img_filename': 'CLEVR_train_044000.png',\n",
+       " 'q': 'How many rubber objects are either small blue spheres or small things?',\n",
+       " 'a': '2',\n",
+       " 'description': 'Scene Description:\\nA large purple rubber sphere rotated 78.4° located at 3D coordinates (2.27, 0.87, 0.70) and pixel coordinates (360, 156, 9.49)\\nA large gray metal cube rotated 152.7° located at 3D coordinates (2.79, -1.26, 0.70) and pixel coordinates (301, 213, 7.91)\\nA large purple metal sphere rotated 79.2° located at 3D coordinates (-2.66, -2.74, 0.70) and pixel coordinates (51, 126, 10.61)\\nA large blue rubber sphere rotated 279.5° located at 3D coordinates (1.31, 2.72, 0.70) and pixel coordinates (376, 112, 11.19)\\nA small brown rubber cube rotated 124.1° located at 3D coordinates (-2.49, 2.61, 0.35) and pixel coordinates (251, 82, 13.79)\\nA small green rubber sphere rotated 323.9° located at 3D coordinates (-2.02, 0.45, 0.35) and pixel coordinates (197, 109, 12.22)\\n'}"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "qa_pairs[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "33fcd4eb-1f33-47d4-a453-76ef00e6d5d3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<think>Okay, let's tackle this question. The user wants to know how many rubber objects are either small blue spheres or small things. Hmm, first, I need to parse each part of the question correctly.\n",
+      "\n",
+      "Let's go back to the scene description. The objects are listed with their attributes: size, color, material, shape, rotation, 3D and pixel coordinates. The key here is to filter the objects based on the criteria given.\n",
+      "\n",
+      "The question has two parts: \"small blue spheres\" OR \"small things\". So any rubber object that is either a small blue sphere or any small thing (regardless of other attributes) counts. But wait, do both categories need to be rubber? Because the question says \"rubber objects are either...\". So rubber is the material, and the condition is either being a small blue sphere or a small thing. So first, all rubber objects, and among them, count those that are either small blue spheres or small (regardless of color or shape). Wait, no. Let me re-read the question.\n",
+      "\n",
+      "The question is: \"How many rubber objects are either small blue spheres or small things?\" So rubber is the material. Within all rubber objects, count the ones that are either (1) small blue spheres OR (2) small things. Wait, does (2) being small things mean that even if they're small and of any color or shape, but they must be rubber?\n",
+      "\n",
+      "Yes, because the entire set is rubber objects. So first, select all objects where material is rubber. Then, within those, count how many are either (1) small, blue, sphere, or (2) small (any color or shape). Wait, but the structure is \"either X or Y\", where X is \"small blue sphere\" and Y is \"small things\". But \"small things\" would include all small objects, regardless of color and shape. However, since we've already narrowed it to rubber objects, \"small things\" here would be small rubber objects, regardless of color and shape.\n",
+      "\n",
+      "But wait, the condition is within rubber objects. So for the first part, small blue spheres (must check size, color, shape) and for the second part, small things (size is small, any color and shape, but since material is already rubber, that's covered). But wait, does the OR merge the two conditions, leading to rubber objects that are either (small blue spheres) or (small any-color any-shape).\n",
+      "\n",
+      "So the combined condition is: object is rubber AND ( (is small AND blue AND sphere) OR (is small) ). Wait, but if the condition for the second part is just \"small things\", which would imply any small object. But the entire group is rubber objects. So it's rubber objects that are small blue spheres OR rubber objects that are small (regardless of color or shape).\n",
+      "\n",
+      "Wait, no. Let's parse the sentence again: \"rubber objects are either small blue spheres or small things\". The \"either/or\" applies to \"small blue spheres\" and \"small things\". So, each rubber object has to be either (a small blue sphere) or (a small thing). However, \"small things\" here might refer to any small object regardless of other attributes. So if a rubber object is small, regardless of color or shape, it counts. But then, the first condition (small blue sphere) would also satisfy being a small thing. Wait, so there's an overlap. But when dealing with OR conditions, we have to avoid double-counting. So, the actual count is the number of small rubber objects (since any small rubber object is covered by the second part, which includes all small rubber objects, whether blue spheres or not) plus any objects that are small blue spheres but not rubber? But no, the question specifies \"rubber objects\", so we can ignore non-rubber ones.\n",
+      "\n",
+      "Wait, perhaps the wording is: \"rubber objects that are either small blue spheres or small things\". So \"small things\" here must reference other attributes. Wait, maybe there's ambiguity here. If the user is grouping \"small things\" as a separate category, regardless of being the other attributes. Let me try to approach this step by step.\n",
+      "\n",
+      "First, list all the rubber objects from the scene description:\n",
+      "\n",
+      "Looking through the list:\n",
+      "\n",
+      "1. A large purple rubber sphere ... location etc.\n",
+      "So material rubber, large, purple, sphere.\n",
+      "\n",
+      "2. A large gray metal cube ... metal, so not rubber.\n",
+      "\n",
+      "3. A large purple metal sphere ... metal, not rubber.\n",
+      "\n",
+      "4. A large blue rubber sphere ... rubber, large, blue, sphere.\n",
+      "\n",
+      "5. A small brown rubber cube ... rubber, small, brown, cube.\n",
+      "\n",
+      "6. A small green rubber sphere ... rubber, small, green, sphere.\n",
+      "\n",
+      "So the rubber objects are items 1,4,5,6.\n",
+      "\n",
+      "Now, for each of these four rubber objects, check if they are either (small blue sphere) or (small things).\n",
+      "\n",
+      "Let's check each:\n",
+      "\n",
+      "1. Large purple rubber sphere. Size: large. So for the first condition (small blue sphere): no. For the second condition (small thing): size large, so no. Not included.\n",
+      "\n",
+      "4. Large blue rubber sphere. Large. So, even though it's blue and sphere, the size is large. So (small blue sphere: no, since size is large). (small thing: no.) So not included.\n",
+      "\n",
+      "5. Small brown rubber cube. Size is small. So regardless of color and shape (brown, cube), this is a small thing. So it meets the second condition. So count it.\n",
+      "\n",
+      "6. Small green rubber sphere. Size: small. So meets the second condition (small thing). But it's a sphere and green, not blue. So this counts under the second category.\n",
+      "\n",
+      "Now, what about the first condition: small blue spheres. Are there any rubber objects that are small blue spheres? The only blue rubber sphere is item 4, which is large. So none of the rubber objects are small blue spheres.\n",
+      "\n",
+      "So now the two options under the OR are:\n",
+      "\n",
+      "- small blue sphere (none found)\n",
+      "- small things (items 5 and 6), both are rubber.\n",
+      "\n",
+      "Therefore total count is 0 + 2 = 2.\n",
+      "\n",
+      "Wait, but the initial question is: number of rubber objects that are either (small blue spheres) or (small things). Since \"small blue spheres\" are a subset of \"small things\" (since if something is a small blue sphere, it is a small thing). Therefore if there were any such objects, they would be counted twice, but in this case, since there are none, the total is just the count of small things.\n",
+      "\n",
+      "But according to the data, the rubber objects that are small are two: item 5 (small brown cube) and item 6 (small green sphere). Neither is a blue sphere, so no overlap. Therefore, sum is 2.\n",
+      "\n",
+      "So the answer should be 2.\n",
+      "\n",
+      "I need to confirm again whether the OR condition includes the union, meaning any rubber object that is either condition. So if either condition is true, count it.\n",
+      "\n",
+      "Since both 5 and 6 are small, even though they aren't blue spheres, they belong to the second category (small things). The blue sphere (if it were small) would have been counted as well. But in this case, the valid counts are 5 and 6. So yes, answer is 2.\n",
+      "</think>\n",
+      "\n",
+      "The answer is 2. \n",
+      "\n",
+      "**Explanation:**\n",
+      "First, identify all rubber objects from the scene description:\n",
+      "1. Large purple rubber sphere (not small)\n",
+      "2. Large blue rubber sphere (not small)\n",
+      "3. Small brown rubber cube (small)\n",
+      "4. Small green rubber sphere (small)\n",
+      "\n",
+      "Next, apply the criteria:\n",
+      "- **Small blue spheres**: None of the rubber objects meet this (the only blue rubber sphere is large).\n",
+      "- **Small rubber objects (regardless of color/shape)**: The small brown rubber cube and small green rubber sphere qualify (2 objects).\n",
+      "\n",
+      "Thus, there are **2 rubber objects** that fit either criterion.\n"
+     ]
+    }
+   ],
+   "source": [
+    "debug_query1 = format_query(qa_pairs[1])\n",
+    "res1 = query_r1(debug_query1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "8e516bd0-f1e5-4898-88a3-3afcaf0ae34e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'img_filename': 'CLEVR_train_044000.png',\n",
+       " 'q': 'How many rubber objects are either small blue spheres or small things?',\n",
+       " 'a': '2',\n",
+       " 'description': 'Scene Description:\\nA large purple rubber sphere rotated 78.4° located at 3D coordinates (2.27, 0.87, 0.70) and pixel coordinates (360, 156, 9.49)\\nA large gray metal cube rotated 152.7° located at 3D coordinates (2.79, -1.26, 0.70) and pixel coordinates (301, 213, 7.91)\\nA large purple metal sphere rotated 79.2° located at 3D coordinates (-2.66, -2.74, 0.70) and pixel coordinates (51, 126, 10.61)\\nA large blue rubber sphere rotated 279.5° located at 3D coordinates (1.31, 2.72, 0.70) and pixel coordinates (376, 112, 11.19)\\nA small brown rubber cube rotated 124.1° located at 3D coordinates (-2.49, 2.61, 0.35) and pixel coordinates (251, 82, 13.79)\\nA small green rubber sphere rotated 323.9° located at 3D coordinates (-2.02, 0.45, 0.35) and pixel coordinates (197, 109, 12.22)\\n'}"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "qa_pairs[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92784518-49e2-443d-9541-2785cbb944cf",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/previous_version/Video-R1-main-previous/src/distill_r1/grpo_r1_distilled.jpg b/previous_version/Video-R1-main-previous/src/distill_r1/grpo_r1_distilled.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..613fc67dc5458c308db82328e9281ae204248fa2
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/distill_r1/grpo_r1_distilled.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0f6135ef837a375090b07e29a18fd2d5cb819100c73d5dc7ea63401f66caf59
+size 303839
diff --git a/previous_version/Video-R1-main-previous/src/distill_r1/query_r1.py b/previous_version/Video-R1-main-previous/src/distill_r1/query_r1.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc617bfe4969996b464bd89d6a9766308040bf37
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/distill_r1/query_r1.py
@@ -0,0 +1,114 @@
+import json
+import random
+import os 
+from openai import OpenAI
+from tqdm import tqdm
+import concurrent.futures
+from typing import List, Dict, Optional
+from datetime import datetime
+from threading import Lock
+import time 
+from prompt import R1_SYS_PROMPT 
+# Initialize the client
+client = OpenAI(
+    api_key=os.environ.get("SL_KEY", "YOUR_SILCONFLOW_KEY"),
+    base_url="https://api.siliconflow.cn/v1",
+)
+
+# Create a lock for thread-safe file writing
+file_lock = Lock()
+
+def format_query(qa_dict: Dict, v2=False) -> str:
+    query = "Answer the question according to scene description.\n\n"
+    query += qa_dict["description"]
+    query += f"\nQuestion:\n{qa_dict['q']}"
+    if v2:
+        query += "\nInstructions:\n"
+        query += "1. Carefully analyze the scene description\n"
+        query += "2. Provide your reasoning if necessary\n"
+        query += "3. For the final answer, start a new line with '**The answer is: **' followed by your answer\n"
+    return query
+
+def write_to_jsonl(result: Dict, filename: str):
+    """Thread-safe function to write a result to JSONL file"""
+    with file_lock:
+        with open(filename, 'a') as f:
+            f.write(json.dumps(result) + '\n')
+
+def query_r1(qa_pair: Dict, output_file: str, model: str = "deepseek-ai/DeepSeek-R1", v2=False) -> Optional[Dict]:
+    query = format_query(qa_pair, v2=v2)
+    try:
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": R1_SYS_PROMPT},
+                {"role": "user", "content": query}],
+            stream=False,
+            max_tokens=4096 
+        )
+        result = {
+            **qa_pair,
+            "r1_response": response.choices[0].message.content,
+            "timestamp": datetime.now().isoformat()
+        }
+        # Write result immediately
+        write_to_jsonl(result, output_file)
+        time.sleep(4)
+        return result
+    except Exception as e:
+        print(f"Error processing query: {e}")
+        error_result = {
+            **qa_pair,
+            "error": str(e),
+            "timestamp": datetime.now().isoformat()
+        }
+        write_to_jsonl(error_result, f"errors_{output_file}")
+        time.sleep(10)
+        return None
+
+def process_qa_pairs_parallel(qa_pairs: List[Dict], output_file: str, max_workers: int = 10) -> List[Dict]:
+    successful_count = 0
+    
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # Create futures for all qa_pairs
+        futures = [executor.submit(query_r1, qa_pair, output_file, v2="v2" in output_file) for qa_pair in qa_pairs]
+        
+        # Process results as they complete with progress bar
+        results = []
+        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
+            try:
+                result = future.result()
+                if result is not None:
+                    results.append(result)
+                    successful_count += 1
+            except Exception as e:
+                print(f"Failed to process query: {e}")
+    
+    return results
+
+if __name__ == "__main__":
+    # Load and shuffle QA pairs
+    random.seed(1234)
+    qa_pairs = json.load(open("/home/lilei/Visual-R1/data/clever_counting_problems_clevr_cogent_v1.0_trainA.json"))
+    random.shuffle(qa_pairs)
+    qa_pairs = qa_pairs[:10000]
+    # Create output filename with timestamp
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_file = f"r1_results_clevr_cogent_v1.0_trainA_v2.jsonl"
+    
+    finished = set() 
+    with open(output_file, 'r') as f:
+        for line in f:
+            ins = json.loads(line)
+            key = ins["img_filename"] + "-" + ins["q"] + "-"  + str(ins["a"])
+            finished.add(key)
+    qa_pairs = [ins for ins in qa_pairs if ins["img_filename"] + "-" + ins["q"] + "-" + str(ins["a"]) not in finished] 
+    print("Finished: ", len(finished))
+    print("Remaining: ", len(qa_pairs)) 
+    # Process QA pairs in parallel
+    r1_results = process_qa_pairs_parallel(qa_pairs, output_file)
+    
+    # Print final statistics
+    print(f"Successfully processed {len(r1_results)} out of {len(qa_pairs)} queries")
+    print(f"Results saved to {output_file}")
+    print(f"Any errors were saved to errors_{output_file}")
\ No newline at end of file
diff --git a/previous_version/Video-R1-main-previous/src/eval/prompts/geoqa_test_prompts.jsonl b/previous_version/Video-R1-main-previous/src/eval/prompts/geoqa_test_prompts.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..f3b346a50fc44443ca6aa2e6d72242bdd1979aff
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/eval/prompts/geoqa_test_prompts.jsonl
@@ -0,0 +1,754 @@
+{"image_path": "./Geo170K/images/test/0.png", "question": "As shown in the figure, in triangle ABC, it is known that angle A = 80.0, angle B = 60.0, DE parallel  BC, then the size of angle CED is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/1.png", "question": "As shown in the figure, AB parallel  CD, straight line EF intersects AB at point E, intersects CD at point F, EG bisects angle BEF, and it intersects CD at point G, angle 1 = 50.0, then angle 2 is equal to ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/2.png", "question": "As shown in the figure, BD bisects angle ABC, CD parallel  AB, if angle BCD = 70.0, then the degree of angle CDB is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/3.png", "question": "As shown in the figure, AB ia tangent to circle O at point B, and the extended line of AO intersects circle O at point C. Connect BC, if angle A = 36.0, then angle C is equal to ()", "ground_truth": "27\u00b0"}
+{"image_path": "./Geo170K/images/test/4.png", "question": "As shown in the figure, straight lines a and b intersect at point O. If angle 1 is equal to 50.0, then angle 2 is equal to ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/5.png", "question": "As shown in the figure, AB // CD, and EF intersects AB and CD at points E, F, angle 1 = 50.0, then the degree of angle 2 is ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/6.png", "question": "As shown in the figure, triangle ABC congruent  triangle ADE, if angle B = 70.0, angle C = 30.0, angle DAC = 35.0, then the degree of angle EAC is ()", "ground_truth": "45\u00b0"}
+{"image_path": "./Geo170K/images/test/7.png", "question": "As shown in the figure, triangle ABC congruent  triangle DEF, points A and D, B and E are the corresponding vertices, and the measured BC = 5.0, BF = 7.0, then the length of EC is ()", "ground_truth": "3cm"}
+{"image_path": "./Geo170K/images/test/8.png", "question": "As shown in the figure, in triangle ABC, angle C = 90.0, AC = BC, AD bisects angle CAB and it intersects BC at D, DE perpendicular  AB at E, if AB = 6.0, then the perimeter of triangle DBE is ()", "ground_truth": "6cm"}
+{"image_path": "./Geo170K/images/test/9.png", "question": "As shown in the figure, in triangle ABC, AB = AC, angle A = 36.0, the perpendicular bisector of AB intersects AC at D, and intersects AB at E, then the degree of angle BDC is ()", "ground_truth": "72\u00b0"}
+{"image_path": "./Geo170K/images/test/10.png", "question": "As shown in the figure, in triangle ABC, angle C = 36.0, rotate triangle ABC anticlockwise around point A by 60.0 to get triangle AED, AD and BC intersect at point F, then the degree of angle AFC is ()", "ground_truth": "84\u00b0"}
+{"image_path": "./Geo170K/images/test/11.png", "question": "As shown in the figure, the straight line AB parallel  CD, Rttriangle DEF is placed as shown, angle EDF = 90.0, if angle 1 + angle F = 70.0, then the degree of angle 2 is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/12.png", "question": "As shown in the figure, AB parallel  EF, CD perpendicular  EF, angle BAC = 50.0, then angle ACD = ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/13.png", "question": "As shown in the figure, triangle ABC is the inscribed triangle of circle O, angle OAB = 35.0, then the degree of angle ACB is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/14.png", "question": "Place the ruler and the right triangle board as shown in the figure (angle ACB is a right angle), it is known that angle 1 = 30.0, then the size of angle 2 is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/15.png", "question": "As shown in the figure, the straight line a and the straight line b are intercepted by the straight line c, b perpendicular  c, the foot of perpendicular is the point A, angle 1 = 70.0. If the line b is parallel to the line a, the line b can be rotated () clockwise around the point A", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/16.png", "question": "As shown in the figure, in circle O, chord AC parallel  radius OB, angle BOC = 50.0, then the degree of angle OAB is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/17.png", "question": "In \u25a1ABCD, the diagonal AC and BD intersect at point O, angle DAC = 42.0, angle CBD = 23.0, then angle COD is ().", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/18.png", "question": "The positions of straight lines a, b, c, and d are shown in the figure. If angle 1 = 58.0, angle 2 = 58.0, angle 3 = 70.0, then angle 4 is equal to ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/19.png", "question": "As shown in the figure, a parallel  b, angle 1 = 158.0, angle 2 = 42.0, angle 4 = 50.0. Then angle 3 = ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/20.png", "question": "As shown in the figure, AB is the diameter of circle O, C and D are two points on circle O. Connect AC, BC, CD, and OD respectively. If angle DOB = 140.0, then angle ACD = ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/21.png", "question": "As shown in the figure, it is known that angle 1 = angle 2 = angle 3 = 55.0, then the degree of angle 4 is ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/22.png", "question": "As shown in the figure, in the diamond ABCD, M and N are respectively AB and CD, and AM = CN, MN and AC intersect at point O. Connect BO. If angle DAC = 28.0, then the degree of angle OBC is ()", "ground_truth": "62\u00b0"}
+{"image_path": "./Geo170K/images/test/23.png", "question": "As shown in the figure, PA and PB are tangent to circle O at A and B respectively. If angle C = 65.0, then the degree of angle P is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/24.png", "question": "As shown in the figure, the line a parallel  b and they intersect the line c at a and b respectively, angle 1 = 50.0, then the degree of angle 2 is ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/25.png", "question": "As shown in the figure, EF parallel  BC, AC bisects angle BAF, angle B = 50.0, then the degree of angle C is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/26.png", "question": "As shown in the figure, in order to measure the height of the school flagpole, Xiaodong uses a bamboo pole with a length of 3.2 as a measuring tool, and moves the bamboo pole so that the top of the bamboo pole and the shadow of the top of the flag pole fall on the same point on the ground. At this time, the distance between the bamboo pole and this point is 8.0 , 22.0 from the flagpole, the height of the flagpole is ().", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/27.png", "question": "As shown in the figure, when planting trees on flat ground, the plant spacing (the horizontal distance between two adjacent trees) is required to be 4.0. If trees are planted on a hillside with a slope of 0.75, and the plant spacing is also required to be 4.0, then the slope distance between two adjacent trees is ()", "ground_truth": "5m"}
+{"image_path": "./Geo170K/images/test/28.png", "question": "As shown in the figure, the right triangle ABC and the equilateral triangle ABD are respectively drawn with the line segment AB as the edge, where angle ACB = 90.0. Connect CD, when the length of CD is the largest, the size of angle CAB is ()", "ground_truth": "45\u00b0"}
+{"image_path": "./Geo170K/images/test/29.png", "question": "As shown in the figure, D is the intersection point of the angular bisector BD and CD of triangle ABC. If angle A = 50.0, then angle D = ()", "ground_truth": "115\u00b0"}
+{"image_path": "./Geo170K/images/test/30.png", "question": "As shown in the figure, it is known that OA = OB = OC and angle ACB = 30.0, then the size of angle AOB is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/31.png", "question": "As shown in the figure, the straight line a parallel  b, the point B is on the straight line b, and AB perpendicular  BC, angle 2 = 65.0, then the degree of angle 1 is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/32.png", "question": "Circle I is the inscribed circle of triangle ABC, D, E, F are 3.0 tangent points, if angle DEF = 52.0, then the degree of angle A is ()", "ground_truth": "76\u00b0"}
+{"image_path": "./Geo170K/images/test/33.png", "question": "As shown in the figure, the straight line AB parallel  CD, angle 1 = 136.0, angle E is a right angle, then angle C is equal to ()", "ground_truth": "46\u00b0"}
+{"image_path": "./Geo170K/images/test/34.png", "question": "As shown in the figure, the straight lines AB and CD are intercepted by the straight line EF. If AB parallel  CD, angle 1 = 100.0, then the size of angle 2 is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/35.png", "question": "As shown in the figure: AB parallel  DE, angle B = 30.0, angle C = 110.0, the degree of angle D is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/36.png", "question": "As shown in the figure, AB is the diameter of circle O, point C is on circle O, passing point C to draw the tangent of circle O and it intersects the extended line of AB at point D. Connect AC. If angle D = 50.0, then the degree of angle A is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/37.png", "question": "As shown in the figure, AB parallel  CD, CP intersects AB at O, AO = PO, if angle C = 50.0, then the degree of angle A is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/38.png", "question": "As shown in the figure, in triangle ABC, AB = AC, passing point A to draw AD parallel  BC. If angle 1 = 70.0, then the size of angle BAC is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/39.png", "question": "Fold a rectangular piece of paper with equal width as shown in the figure. If angle 1 = 140.0, then the degree of angle 2 is ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/40.png", "question": "As shown in the figure, it is known that the straight lines a and b are intercepted by the straight line c, a parallel  b, angle 1 = 50.0, then angle 2 = ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/41.png", "question": "The positions of straight lines a, b, c, and d are shown in the figure. If angle 1 = 100.0, angle 2 = 100.0, angle 3 = 125.0, then angle 4 is equal to ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/42.png", "question": "The figure is a schematic diagram of a kite stand made by Xiao Liu. It is known that BC parallel  PQ, AB: AP = 2.0:5.0, AQ = 20.0, then the length of CQ is ()", "ground_truth": "12cm"}
+{"image_path": "./Geo170K/images/test/43.png", "question": "As shown in the figure, triangle ODC is the figure obtained by rotating triangle OAB clockwise around point O by 30.0. If point D happens to fall on AB, and the degree of angle AOC is 100.0, then the degree of angle DOB is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/44.png", "question": "As shown in the figure, the two street lamps A and B are separated by 30.0. One night, when Xiaogang went straight 25.0 from the bottom of street lamp A to the bottom of street lamp B, he found that the top of his figure just touched the bottom of street lamp B. It is known that Xiaogang's height is 1.5, then the height of the street lamp is ()", "ground_truth": "9\u7c73"}
+{"image_path": "./Geo170K/images/test/45.png", "question": "As shown in the figure, C is a point on circle O, O is the center of the circle, if angle C = 35.0, then the degree of angle AOB is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/46.png", "question": "As shown in the figure, if AB parallel  CD, angle A = 70.0, then the degree of angle 1 is ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/47.png", "question": "As shown in the figure, the straight line AB parallel  CD, angle C = 44.0, angle E is a right angle, then angle 1 is equal to ()", "ground_truth": "134\u00b0"}
+{"image_path": "./Geo170K/images/test/48.png", "question": "As shown in the figure, A, B, C are any three points on circle O, if angle BOC = 100.0, then the degree of angle BAC is ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/49.png", "question": "As shown in the figure, in the inscribed pentagon ABCDE of circle O, angle CAD = 35.0, angle AED = 115.0, then the degree of angle B is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/50.png", "question": "As shown in the figure, in triangle ABC, angle C = 90.0, AD is the bisector of angle BAC, DE perpendicular  AB at E, if DE = 8.0, DB = 10.0, then BC is equal to ()", "ground_truth": "18cm"}
+{"image_path": "./Geo170K/images/test/51.png", "question": "As shown in the figure, the straight lines AB and CD intersect at point O, EO perpendicular  AB, and the foot of perpendicular is point O, angle BOD = 50.0, then angle COE = ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/52.png", "question": "As shown in the figure, the points B, E, C, and F are on the same straight line, triangle ABC congruent  triangle DEF, angle B = 45.0, angle F = 65.0, then the degree of angle COE is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/53.png", "question": "As shown in the figure, put the two vertices of a right triangle plate with 45.0 angles on the opposite edges of the ruler. If angle 1 = 27.5, then angle 2 is equal to ()", "ground_truth": "17.5\u00b0"}
+{"image_path": "./Geo170K/images/test/54.png", "question": "As shown in the figure, the straight line a parallel  b, the point B is on the straight line b, and AB perpendicular  BC, angle 1 = 55.0, then the degree of angle 2 is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/55.png", "question": "As shown in the figure, the straight line a parallel  b, the straight line c intersects a and b, angle 1 = 55.0, then angle 2 = ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/56.png", "question": "Place a ruler and a triangular plate as shown in the figure, angle 1 = 40.0, then the degree of angle 2 is ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/57.png", "question": "As shown in the figure, the straight lines AB and CD intersect at point O, and the radial OM bisects angle AOC, ON perpendicular  OM. If angle AOC = 70.0, then the degree of angle CON is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/58.png", "question": "As shown in the figure, the diameter CD of circle O crosses the midpoint G of chord EF, angle DCF = 20.0, then angle EOD is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/59.png", "question": "As shown in the figure, AB is parallel to CD, if angle B = 20.0, then angle C is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/60.png", "question": "As shown in the figure, AB parallel  CD, angle CED = 90.0, angle AEC = 35.0, then the size of angle D is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/61.png", "question": "As shown in the figure, AB parallel  CD, AD bisects angle BAC, and angle C = 80.0, then the degree of angle D is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/62.png", "question": "As shown in the figure, AB parallel  CD, if angle 2 = 135.0, then the degree of angle 1 is ()", "ground_truth": "45\u00b0"}
+{"image_path": "./Geo170K/images/test/63.png", "question": "As shown in the figure, AB parallel  CD, point E is on BC, and CD = CE, angle D = 74.0, then the degree of angle B is ()", "ground_truth": "32\u00b0"}
+{"image_path": "./Geo170K/images/test/64.png", "question": "As shown in the figure, AB parallel  CD, point E is on the extended line of CA. If angle BAE = 40.0, then the size of angle ACD is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/65.png", "question": "As shown in the figure, use the benchmark BE to measure the height of the tree CD. If the length of the benchmark BE is 2.0, AB = 3.0, AC = 9.0, and the points A, E, and D are on a straight line, then the tree CD is ()", "ground_truth": "6\u7c73"}
+{"image_path": "./Geo170K/images/test/66.png", "question": "After filling some oil in a cylindrical oil tank with a diameter of 200.0, the cross section is shown in the figure. If the width of the oil surface AB = 160.0, the maximum depth of oil is ()", "ground_truth": "40cm"}
+{"image_path": "./Geo170K/images/test/67.png", "question": "As shown in the figure, angle 1 = angle 2, angle 3 = 30.0, then angle 4 is equal to ()", "ground_truth": "150\u00b0"}
+{"image_path": "./Geo170K/images/test/68.png", "question": "As shown in the figure, AB parallel  CD, angle B = 20.0, angle D = 60.0, then the degree of angle BED is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/69.png", "question": "As shown in the figure, the straight line AB parallel  CD, AE bisects angle CAB, angle ACD = 40.0, then the degree of angle AEC is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/70.png", "question": "Xuan Xuan and Kai Kai are in the same mathematics study group. In a math activity class, they each used a square piece of paper with a side length of 12.0 to make a pair of jigsaw puzzles, and cooperated to design the work shown in the picture. Help them calculate the sum of the area of \u200b\u200bthe three figures circled in the figure, it is ()", "ground_truth": "36cm"}
+{"image_path": "./Geo170K/images/test/71.png", "question": "As shown in the figure, the straight line a parallel  b, angle 2 = 35.0, angle 3 = 40.0, then the degree of angle 1 is ()", "ground_truth": "105\u00b0"}
+{"image_path": "./Geo170K/images/test/72.png", "question": "As shown in the figure, BD is the angular bisector of triangle ABC, AE perpendicular  BD, and the foot of perpendicular is F. If angle ABC = 35.0, angle C = 50.0, then the degree of angle CDE is ()", "ground_truth": "45\u00b0"}
+{"image_path": "./Geo170K/images/test/73.png", "question": "As shown in the figure, the straight line AD parallel  BC, if angle 1 = 42.0, angle BAC = 78.0, then the degree of angle 2 is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/74.png", "question": "As shown in the figure, the perimeter of \u25a1ABCD is 16.0, AC and BD intersect at point O, and OE perpendicular  AC and it intersects AD at point E, then the perimeter of triangle DCE is ()", "ground_truth": "8cm"}
+{"image_path": "./Geo170K/images/test/75.png", "question": "As shown in the figure, triangle ABC is the inscribed triangle of circle O. If angle ABC = 70.0, then the degree of angle AOC is equal to ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/76.png", "question": "As shown in the figure, AB parallel  CD, radial AE intersects CD at point F, if angle 1 = 115.0, then the degree of angle 2 is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/77.png", "question": "As shown in the figure, a // b, put the right-angled vertex of a triangular plate on the straight line a, angle 1 = 42.0, then the degree of angle 2 is ()", "ground_truth": "48\u00b0"}
+{"image_path": "./Geo170K/images/test/78.png", "question": "As shown in the figure, a parallel  b, point B is on the straight line b, and AB perpendicular  BC, angle 1 = 36.0, then angle 2 = ()", "ground_truth": "54\u00b0"}
+{"image_path": "./Geo170K/images/test/79.png", "question": "As shown in the figure, if angle 1 = angle 3, angle 2 = 60.0, then the degree of angle 4 is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/80.png", "question": "As shown in the figure, AB parallel  CD, AE bisects angle CAB and CD at point E, if angle C = 70.0, then the degree of angle AED is ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/81.png", "question": "As shown in the figure, the perimeter of parallelogram ABCD is 32.0, AC, BD intersect at point O, and OE perpendicular  AC and it intersects AD at point E, then the perimeter of triangle DCE is ()", "ground_truth": "16cm"}
+{"image_path": "./Geo170K/images/test/82.png", "question": "As shown in the figure, a cylinder with a bottom circumference of 24.0 and a height of 5.0, the shortest route that an ant passes along the surface from point A to point B is ()", "ground_truth": "13m"}
+{"image_path": "./Geo170K/images/test/83.png", "question": "As shown in the figure, in triangle ABC, angle BAC = 90.0, AD perpendicular  BC at point D, AE bisects angle DAC, angle B = 50.0, so the degree of angle DAE is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/84.png", "question": "As shown in the figure, the line l parallel  m parallel  n, the vertices B and C of the triangle ABC are on the line n and line m, the angle between BC and the line n is 25.0, and angle ACB = 60.0, then the degree of angle a is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/85.png", "question": "As shown in the figure, it is known that in circle O, the central angle angle AOB = 100.0, then the angle of circumference angle ACB is equal to ().", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/86.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O with radius 1.0, if angle BAC = 60.0, then the length of BC is ()", "ground_truth": "\u221a{3}"}
+{"image_path": "./Geo170K/images/test/87.png", "question": "As shown in the figure, the circle O is the circumscribed circle of triangle ABC, and the bisector of angle BAC and angle ABC intersects at point I. Extend AI and it intersects circle O at point D. Connect BD and DC. If the radius of circle O is 8.0, angle BAC = 120.0, then the length of DI is ()", "ground_truth": "8\u221a{3}"}
+{"image_path": "./Geo170K/images/test/88.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC. Connect OB and OC, if the radius of circle O is 2.0, angle BAC = 60.0, then the length of BC is ()", "ground_truth": "2\u221a{3}"}
+{"image_path": "./Geo170K/images/test/89.png", "question": "As shown in the figure, AB and CD are the two diameters of circle O, chord DE parallel  AB, arc DE is the arc of 50.0, then angle BOC is ()", "ground_truth": "115\u00b0"}
+{"image_path": "./Geo170K/images/test/90.png", "question": "As shown in the figure, points A, B, and C are on circle O, angle ABO = 22.0, angle ACO = 42.0, then angle BOC is equal to ()", "ground_truth": "128\u00b0"}
+{"image_path": "./Geo170K/images/test/91.png", "question": "As shown in the figure, A, B, C are three points on circle O, angle ACB = 25.0, then the degree of angle BAO is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/92.png", "question": "As shown in the figure, it is known that in circle O, angle AOB = 50.0, then the degree of the angle of circumference angle ACB is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/93.png", "question": "As shown in the figure, AB is the diameter of circle O, C and D are two points on circle O, angle BAC = 30.0, arc AD = arc CD. Then angle DAC is equal to ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/94.png", "question": "As shown in the figure, AB is the diameter of circle O, C and D are two points on the circle, angle D = 34.0, then the degree of angle BOC is ()", "ground_truth": "112\u00b0"}
+{"image_path": "./Geo170K/images/test/95.png", "question": "As shown in the figure, points A, B, and C are all on circle O, when angle OBC = 40.0, the degree of angle A is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/96.png", "question": "As shown in the figure, the diameter AB of circle O is perpendicular to the chord CD, the foot of perpendicular is the point E, angle CAO = 22.5, OC = 6.0, then the length of CD is ()", "ground_truth": "6\u221a{2}"}
+{"image_path": "./Geo170K/images/test/97.png", "question": "As shown in the figure, in circle O, chord BC and radius OA intersect at point D. Connect AB and OC. If angle A = 60.0, angle ADC = 90.0, then the degree of angle C is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/98.png", "question": "As shown in the figure, points A, B, and P are three points on circle O, if angle AOB = 40.0, then the degree of angle APB is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/99.png", "question": "As shown in the figure, AB is the chord of circle O, OC perpendicular  AB and it intersects circle O at point C. Connect OA, OB, BC, if angle ABC = 25.0, then the size of angle AOB is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/100.png", "question": "As shown in the figure, given the angle of circumference angle A = 50.0, then the size of angle OBC is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/101.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord of circle O, angle ADC = 26.0, then the degree of angle CAB is ()", "ground_truth": "64\u00b0"}
+{"image_path": "./Geo170K/images/test/102.png", "question": "As shown in the figure, in circle O, AB is the diameter, CD is the chord, AB perpendicular  CD, the foot of perpendicular is the point E. Connect CO and AD, if angle BOC = 30.0, then the degree of angle BAD is ()", "ground_truth": "15\u00b0"}
+{"image_path": "./Geo170K/images/test/103.png", "question": "As shown in the figure, AB is the diameter of circle O, points C and D are two points on the circle, and angle AOC = 126.0, then angle CDB = ()", "ground_truth": "27\u00b0"}
+{"image_path": "./Geo170K/images/test/104.png", "question": "As shown in the figure, AB is the diameter of circle O, points C and D are on circle O, and point C is the midpoint of arc BD, passing point C to draw the perpendicular line EF of AD and it intersects straight line AD at point E, if the radius of circle O is 2.5, the length of AC is 4.0, then the length of CE is ()", "ground_truth": "\\frac{12}{5}"}
+{"image_path": "./Geo170K/images/test/105.png", "question": "As shown in the figure, the points A, B, and C are on circle O, and it is known that angle ABC = 130.0, then angle AOC = ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/106.png", "question": "As shown in the figure, it is known that the radius of circle O is 5.0, the central angles of chords AB and CD are angle AOB, angle COD, and angle AOB is complementary to angle COD, chord CD = 8.0, then the length of chord AB is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/107.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord of circle O, and the extended lines of AB and CD intersect at point E. Given that AB = 2 DE, angle E = 16.0, then the degree of angle ABC is ()", "ground_truth": "24\u00b0"}
+{"image_path": "./Geo170K/images/test/108.png", "question": "This question examines the theorem of angle of circumference, the key is to answer it based on the relationship between the central angle and the angle of circumference of the same chord. 4.0. As shown in the figure, AB is the diameter of circle O, C is the point on circle O (except A and B), angle AOD = 136.0, then the degree of angle C is ()", "ground_truth": "22\u00b0"}
+{"image_path": "./Geo170K/images/test/109.png", "question": "As shown in the figure, AB is the diameter of circle O, and points C and D are on circle O. If angle BOD = 130.0, then the degree of angle ACD is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/110.png", "question": "Shaoxing is a famous bridge township. As shown in the figure, the distance CD from the top of the round arch bridge to the water surface is 8.0, and the arch radius OC is 5.0, so the width of the water surface AB is ()", "ground_truth": "8m"}
+{"image_path": "./Geo170K/images/test/111.png", "question": "As shown in the figure, it is known that angle \u03b1 = 130.0, then angle \u03b2 = ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/112.png", "question": "As shown in the figure, when the width of the water surface AB in the circular bridge hole is 8.0, the arc ACB is exactly a semicircle. When the water surface rises 1.0, the water surface width A\u2032B\u2032 in the bridge hole is ()", "ground_truth": "2\u221a{15}\u7c73"}
+{"image_path": "./Geo170K/images/test/113.png", "question": "In the right triangle ABC, angle CAB = 90.0, angle ABC = 72.0, AD is the angle bisector of angle CAB, and the intersection BC is at point D, and crossing point C is the high line CE on the AD side in triangle ACD, then the degree of angle ECD is ()", "ground_truth": "27\u00b0"}
+{"image_path": "./Geo170K/images/test/114.png", "question": "As shown in the figure, AO is the height of the cone, the bottom radius of the cone OB = 0.7, the length of AB is 2.5, then the length of AO is ()", "ground_truth": "2.4"}
+{"image_path": "./Geo170K/images/test/115.png", "question": "As shown in the figure, in circle O, chord AC and BD intersect at point E, arc AB = arc BC = arc CD, if angle BEC = 110.0, then angle BDC = ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/116.png", "question": "As shown in the figure, in the sector OAB with a radius of 1.0 and a central angle of 90.0, OA and OB are the diameters respectively as a semicircle, and the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "\\frac{1}{2}cm\u00b2"}
+{"image_path": "./Geo170K/images/test/117.png", "question": "Use a sector paper sheet with a central angle of 120.0 and a radius of 6.0 to roll into a conical bottomless paper cap (as shown in the picture), then the bottom perimeter of the paper cap is ()", "ground_truth": "4\u03c0cm"}
+{"image_path": "./Geo170K/images/test/118.png", "question": "The picture shows a small paper cap with a conical chimney. The length of its generatrix l is 13.0 and its height h is 12.0. The area of \u200b\u200bpaper required to make this paper cap is (the seams are ignored) ()", "ground_truth": "65\u03c0"}
+{"image_path": "./Geo170K/images/test/119.png", "question": "Use a sector piece of paper with a central angle of 120.0 and a radius of 3.0 to roll into a cone-shaped bottomless paper cap (as shown in the picture), then the height of the paper is ()", "ground_truth": "2\u221a{2}cm"}
+{"image_path": "./Geo170K/images/test/120.png", "question": "As shown in the figure, the expanded figure of the lateral surface of a cone is a semicircle with a radius of 10.0, then the radius of its bottom is ()", "ground_truth": "5"}
+{"image_path": "./Geo170K/images/test/121.png", "question": "As shown in the figure, use a sector cardboard with a radius of 24.0 to make a conical hat (the seams are ignored). If the radius of the bottom surface of the conical hat is 10.0, then the area of \u200b\u200bthis sector cardboard is ()", "ground_truth": "240\u03c0cm^{2}"}
+{"image_path": "./Geo170K/images/test/122.png", "question": "As shown in the figure, the length of the generatrix of the cone is 5.0, and the length of the height line is 4.0, then the bottom area of \u200b\u200bthe cone is ()", "ground_truth": "9\u03c0cm^{2}"}
+{"image_path": "./Geo170K/images/test/123.png", "question": "The production process of paper umbrellas in our country is very ingenious. As shown in the figure, whether the umbrella is opened or closed, the handle AP always bisects the angle angle BAC formed by the two ribs in the same plane, and AE = AF, DE = DF, so as to ensure that the umbrella ring can slide along the handle. When a toy umbrella is opened, the BDC is on the same straight line. If AB = 50.0, AD = 14.0, then the area of \u200b\u200boil paper required to make such a paper umbrella is (don't remember the seam) ()", "ground_truth": "2400\u03c0cm^{2}"}
+{"image_path": "./Geo170K/images/test/124.png", "question": "As shown in the figure, a sector with a central angle of 120.0 and a radius of 6.0 encloses the side of a cone (the joints are ignored), then the height of the cone is ()", "ground_truth": "4\u221a{2}"}
+{"image_path": "./Geo170K/images/test/125.png", "question": "As shown in the figure, in Rttriangle ABC, angle ACB = 90.0, AC = 4.0, BC = 3.0, rotate triangle ABC around the line where AC is located to obtain a rotating body, then the lateral area of \u200b\u200bthe rotating body is ()", "ground_truth": "15\u03c0"}
+{"image_path": "./Geo170K/images/test/126.png", "question": "As shown in the figure, cut a circle and a sector piece of paper on the paper so that it can form a cone model. If the radius of the circle is 1.0 and the central angle of the sector is equal to 90.0, then the radius of the sector is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/127.png", "question": "As shown in a sector iron sheet OAB, it is known that OA = 30.0, angle AOB = 120.0, the worker master combines OA and OB to form a conical chimney cap (the joints are ignored), then the radius of the bottom circle of the chimney cap is ()", "ground_truth": "10cm"}
+{"image_path": "./Geo170K/images/test/128.png", "question": "As shown in the figure, it is known that the radius of the bottom surface of the cone is 6.0, and the length of the generatrix is \u200b\u200b10.0, then the lateral area of \u200b\u200bthe cone is ()", "ground_truth": "60\u03c0"}
+{"image_path": "./Geo170K/images/test/129.png", "question": "Lulu cuts a circle and a sector piece of paper from the paper (as shown in the picture), and uses them to form a cone model. If the radius of the circle is 1.0. The central angle of the sector is equal to 120.0, then the radius of the sector is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/130.png", "question": "As shown in the figure, there is a sector with a central angle of 120.0 and a radius of 6.0. If OA and OB are overlapped to form a cone side, the diameter of the bottom of the cone is ()", "ground_truth": "4cm"}
+{"image_path": "./Geo170K/images/test/131.png", "question": "As shown in the picture, the length of the generatrix of the cone-shaped tent roof is AB = 10.0, the bottom radius is BO = 5.0, and the lateral area of \u200b\u200bthe cone-shaped tent roof (excluding the seams) is ()", "ground_truth": "50\u03c0m^{2}"}
+{"image_path": "./Geo170K/images/test/132.png", "question": "As shown in the figure, in circle O, the length of chord AB is 10.0, and the angle of circumference angle ACB = 45.0, then the diameter of the circle AD is ()", "ground_truth": "10\u221a{2}"}
+{"image_path": "./Geo170K/images/test/133.png", "question": "As shown in the figure, in triangle ABC. angle C = 90.0, point D is a moving point on BC (point D does not coincide with point C). The circle with CD as the diameter intersects AD at point P. If AC = 6.0. The minimum length of the line segment BP is 2.0. Then the length of AB is ()", "ground_truth": "2\u221a{13}"}
+{"image_path": "./Geo170K/images/test/134.png", "question": "As shown in the figure, in order to measure the height AB of a pavilion (the distance from the top A to the horizontal ground BD), Xiaoming placed a step DE (DE = BC = 0.6) that is the same height as the pavilion step BC beside the pavilion, find A, B, C Three points are collinear), place a mirror horizontally at point G on the platform, and measure CG = 12.0, and then move back along the straight line CG to point E. At this time, you can see the top A of the pavilion in the mirror, and measure GE = 2.0, Xiaoming's height EF = 1.6, then the height of the pavilion AB is approximately ()", "ground_truth": "10.2\u7c73"}
+{"image_path": "./Geo170K/images/test/135.png", "question": "As shown in the figure, in order to estimate the width of the Jing River, a target point P is selected on the opposite bank of the Jing River, and points Q and S are taken near the bank, so that the points P, Q, and S are in a straight line, and the straight line PS is perpendicular to the river. Choose an appropriate point T on the straight line a passing point S and perpendicular to PS. The intersection of PT and the straight line b passing point Q and perpendicular to PS is R. If QS = 60.0, ST = 120.0, QR = 80.0, then the width of the river PQ is ()", "ground_truth": "120m"}
+{"image_path": "./Geo170K/images/test/136.png", "question": "As shown in the picture, Xiaoying designed a flashlight to measure the height of an ancient city wall. Place a horizontal plane mirror at point P. The light starts from point A and is reflected by the plane mirror and hits the top C of the ancient city wall CD. It is known that AB perpendicular  BD, CD perpendicular  BD. And it is measured that AB = 1.4, BP = 2.1, PD = 12.0. Then the height of the ancient city wall CD is ()", "ground_truth": "8\u7c73"}
+{"image_path": "./Geo170K/images/test/137.png", "question": "As shown in the figure, in circle O, point M is the midpoint of arc AB. Connect MO and extend it to intersect circle O at point N, connect BN, if angle AOB = 140.0, then the degree of angle N is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/138.png", "question": "As shown in the figure, in order to measure the degree of tree AB, a certain mathematics learning interest group measured the length of the tree's shadow BC in the sun as 9.0. At the same moment, they also measured the shadow length of Xiaoliang in the sun as 1.5. Knowing that Xiaoliang's height is 1.8, then the height of tree AB is ()", "ground_truth": "10.8m"}
+{"image_path": "./Geo170K/images/test/139.png", "question": "As shown in the picture, it is an ancient masher in the countryside. It is known that the height of the support column AB is 0.3, the length of the pedal DE is 1.0, and the distance from the support point A to the foot D is 0.6. When foot D touches the ground, the head point E rises ()", "ground_truth": "0.5\u7c73"}
+{"image_path": "./Geo170K/images/test/140.png", "question": "As shown in the figure, the light source P is directly above the crossbar AB, the shadow of AB under the light is CD, AB parallel  CD, AB = 2.0, CD = 5.0, the distance between point P and CD is 3.0, then the distance between AB and CD is ().", "ground_truth": "\\frac{9}{5}"}
+{"image_path": "./Geo170K/images/test/141.png", "question": "As shown in the figure, Xiaoqiang made a small hole imaging device in which the length of the paper tube is 15.0. He prepared a candle with a length of 20.0. To get an image with a height of 4.0, the distance between the candle and the paper tube should be ()", "ground_truth": "75cm"}
+{"image_path": "./Geo170K/images/test/142.png", "question": "As shown in the figure, in a badminton game, Lin Dan, the athlete standing at M in the field, clicks the request from N to point B in the opponent. It is known that the net height OA = 1.52, OB = 4.0, OM = 5.0, then when Lin Dan takes off, the distance from the hitting point to the ground NM = ()", "ground_truth": "3.42m"}
+{"image_path": "./Geo170K/images/test/143.png", "question": "While measuring the height of the building, Xiao Ming first measured the shadow length BA of the building on the ground as 15.0 (as shown in the figure), and then set up a benchmark with a height of 2.0 at A, and measured the shadow length AC of the benchmark as 3.0, then the height of the building is ()", "ground_truth": "10\u7c73"}
+{"image_path": "./Geo170K/images/test/144.png", "question": "As shown in the figure: the length of two vertical telephone poles AB is 6.0, the length of CD is 3.0, AD intersects BC at point E, then the length of the distance from E to the ground EF is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/145.png", "question": "As shown in the figure, a square DEFG model should be cut on a piece of triangle ABC paper. Among them, G and F are on BC, D and E are on AB and AC respectively, AH perpendicular  BC and it intersects DE at M, if BC = 12.0, AH = 8.0, then the edge length of the square DEFG is ()", "ground_truth": "\\frac{24}{5}cm"}
+{"image_path": "./Geo170K/images/test/146.png", "question": "On 27.0 2009.0, 10.0, 2009, Shanghai team player Wu Di came to the fore in the National Games and defeated the top-seeded men's singles player Zeng Shaoxuan with a score of 2.0:0.0, and won the men's singles championship in tennis at the National Games. The picture below is a ball played by Wu Di in the final. It is known that the net height is 0.8, and the horizontal distance from the hitting point to the net is 4.0. When the ball is played, the ball can hit the net and the landing point is exactly 6.0 away from the net. Then the height h of the racket hit is ()", "ground_truth": "\\frac{4}{3}\u7c73"}
+{"image_path": "./Geo170K/images/test/147.png", "question": "As shown in the figure, points A, B, and C are on circle O, angle ABO = 40.0, angle ACO = 30.0, then the degree of angle BOC is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/148.png", "question": "As shown in the figure, AB is a ladder leaning against the wall, the foot of the ladder is away from the wall 2.0, the point D on the ladder is away from the wall 1.8, the length of BD is 0.6, then the length of the ladder is ()", "ground_truth": "6.00\u7c73"}
+{"image_path": "./Geo170K/images/test/149.png", "question": "In order to measure the height of the school flagpole AC, a school math interest group erected a benchmark DF with a length of 1.5 at point F. As shown in the figure, the length of the shadow EF of DF is measured as 1.0, and then measure the length of the shadow BC of the flagpole AC to be 6.0, then the height of the flagpole AC is ()", "ground_truth": "9\u7c73"}
+{"image_path": "./Geo170K/images/test/150.png", "question": "As shown in the figure, Xiaodong uses a bamboo pole with a length of 3.2 as a measuring tool to measure the height of the school flagpole, and moves the bamboo pole so that the shadow on the top of the pole and the flag pole falls on the same point on the ground. At this time, the distance between the bamboo pole and this point is 8.0, and the distance from the flag pole is 22.0, then the height of the flag pole is ()", "ground_truth": "12m"}
+{"image_path": "./Geo170K/images/test/151.png", "question": "As shown in the figure, CD is a plane mirror, the light is emitted from point A, reflected by point E on CD, and irradiated to point B. If the incident angle is \u03b1, AC perpendicular  CD, BD perpendicular  CD, the feet of perpendicular are C, D, and AC = 3.0, BD = 6.0, CD = 10.0, then the length of the line segment ED is ()", "ground_truth": "\\frac{20}{3}"}
+{"image_path": "./Geo170K/images/test/152.png", "question": "As shown in the figure, Xiaoming designed two right angles to measure the width of the river BC, he measured AB = 2.0, BD = frac {7.0}{3.0}, CE = 9.0, then the width of the river BC is ()", "ground_truth": "\\frac{40}{7}\u7c73"}
+{"image_path": "./Geo170K/images/test/153.png", "question": "As shown in the figure, a student saw a tree by the lake. He visually observed that the distance between himself and the tree is 20.0, and the reflection of the top of the tree in the water is 5.0 far away from him. The student's height is 1.7, and the height of the tree is ( ).", "ground_truth": "5.1"}
+{"image_path": "./Geo170K/images/test/154.png", "question": "As shown in the figure, AB is a fixed climbing ladder leaning on the wall, the distance from the foot of the ladder B to the foot of the wall C is 1.6, the distance from the point D on the ladder to the wall is 1.4, and the length of the ladder is 0.5, then the length of the ladder is ()", "ground_truth": "4m"}
+{"image_path": "./Geo170K/images/test/155.png", "question": "As shown in the figure, the sunlight enters the room from the windows of the classroom, the length of the shadow of the window frame AB on the ground DE = 1.8, the distance from the lower eaves of the window to the ground BC = 1.0, EC = 1.2, then the height of the window AB is ()", "ground_truth": "1.5m"}
+{"image_path": "./Geo170K/images/test/156.png", "question": "As shown in the figure, AB is a long ladder leaning on the wall, the foot of the ladder B is away from the wall 1.6, the point D on the ladder is away from the wall 1.4, the length of BD is 0.55, then the length of the ladder is ()", "ground_truth": "4.40\u7c73"}
+{"image_path": "./Geo170K/images/test/157.png", "question": "As shown in the figure, the student Xiao Li whose height is 1.6 wants to measure the height of the school's flagpole. When he stands at C, the shadow of the top of his head coincides with the shadow of the top of the flagpole, and AC = 2.0, BC = 8.0, then the height of the flagpole is ()", "ground_truth": "8\u7c73"}
+{"image_path": "./Geo170K/images/test/158.png", "question": "As shown in the figure, the quadrilateral ABCD and A\u2032B\u2032C\u2032D\u2032 are similar figures with the similar center at point O. If OA\u2032: A\u2032A = 2.0:1.0, the area of \u200b\u200bthe quadrilateral A\u2032B\u2032C\u2032D\u2032 is 12.0 ^ 2, then the area of \u200b\u200bthe quadrilateral ABCD is ()", "ground_truth": "27cm^{2}"}
+{"image_path": "./Geo170K/images/test/159.png", "question": "As shown in the figure, in triangle ABC, angle C = 90.0, if AC = 4.0, BC = 3.0, then cosB is equal to ()", "ground_truth": "\\frac{3}{5}"}
+{"image_path": "./Geo170K/images/test/160.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, AC = 4.0, BC = 3.0, then the value of sinB is equal to ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/161.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, AC = 3.0, BC = 4.0, then the value of cosA is ()", "ground_truth": "\\frac{3}{5}"}
+{"image_path": "./Geo170K/images/test/162.png", "question": "As shown in the figure, it is known that in Rttriangle ABC, angle C = 90.0, AB = 10.0, AC = 8.0, then the value of tanB is ()", "ground_truth": "\\frac{4}{3}"}
+{"image_path": "./Geo170K/images/test/163.png", "question": "As shown in the figure, the homothetic figures are composed of a triangle ruler and its center projection under the light. If the ratio of the distance from the bulb to the vertex of the triangle ruler to the distance from the bulb to the corresponding vertex of the triangular ruler projection is 2.0:5.0, and the length of one edge of the triangle ruler is 8.0, Then the corresponding edge length of the projection triangle is ()", "ground_truth": "20cm"}
+{"image_path": "./Geo170K/images/test/164.png", "question": "As shown in the figure, given the angle of circumference angle BAC = 40.0, then the degree of the central angle angle BOC is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/165.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, AC = 4.0, AB = 5.0, then the value of cosA is ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/166.png", "question": "As shown in the figure, in triangle ABC, angle C = Rtangle , AB = 5.0, AC = 4.0, then the value of sinA is ()", "ground_truth": "\\frac{3}{5}"}
+{"image_path": "./Geo170K/images/test/167.png", "question": "In Rttriangle ABC, angle C = 90.0, AB = 2.0, BC = 1.0, then the value of sinB is ()", "ground_truth": "\\frac{\u221a{3}}{2}"}
+{"image_path": "./Geo170K/images/test/168.png", "question": "As shown in the figure, in Rttriangle ABC, it is known that angle A = 90.0, AC = 3.0, AB = 4.0, then sinB is equal to ()", "ground_truth": "\\frac{3}{5}"}
+{"image_path": "./Geo170K/images/test/169.png", "question": "In Rttriangle ACB, angle C = 90.0, BC = 5.0, AC = 12.0, then sinA = ()", "ground_truth": "\\frac{5}{13}"}
+{"image_path": "./Geo170K/images/test/170.png", "question": "As shown in the figure, in the rectangular coordinate system, P is the point in the first quadrant, and its coordinates are (4.0,m), and the cosine value of the angle \u03b1 between OP and the positive semi-axis of the x-axis is frac {3.0}{5.0}, then the value of tanangle \u03b1 is ()", "ground_truth": "\\frac{4}{3}"}
+{"image_path": "./Geo170K/images/test/171.png", "question": "As shown in the figure, it is known that in Rttriangle ABC, angle C = 90.0, AC = 6.0, BC = 8.0, then the value of tanA is ()", "ground_truth": "\\frac{4}{3}"}
+{"image_path": "./Geo170K/images/test/172.png", "question": "As shown in the figure: In Rttriangle ABC, angle C = 90.0, AC = 8.0, AB = 10.0, then the value of sinB is equal to ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/173.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, AC = 1.0, BC = 2.0, then the value of cosB is ()", "ground_truth": "\\frac{2\u221a{5}}{5}"}
+{"image_path": "./Geo170K/images/test/174.png", "question": "As shown in the figure, in the plane rectangular coordinate system, the coordinates of point A are (2.0,3.0), then the value of tan\u03b1 is ()", "ground_truth": "\\frac{3}{2}"}
+{"image_path": "./Geo170K/images/test/175.png", "question": "As shown in the figure, it is known that in Rttriangle ABC, angle C = 90.0, AC = 4.0, tanA = frac {1.0}{2.0}, then the length of BC is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/176.png", "question": "As shown in the figure, in ABC, AB = AC = 4.0, BC = 6.0, then cosB = ()", "ground_truth": "\\frac{3}{4}"}
+{"image_path": "./Geo170K/images/test/177.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, AC = 4.0, AB = 5.0, then the value of sinB is ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/178.png", "question": "As shown in the figure, the four small squares with edge length of 1.0 form a large square. A, B, and O are the vertices of the small squares, the radius of circle O is 1.0, and P is the point on circle O, and the small square is located at the upper right. , then sinangle APB is equal to ()", "ground_truth": "\\frac{\u221a{2}}{2}"}
+{"image_path": "./Geo170K/images/test/179.png", "question": "As shown in the figure, the hypotenuse of Rttriangle ABC AB = 10.0, cosA = frac {3.0}{5.0}, then the length of BC is ()", "ground_truth": "8cm"}
+{"image_path": "./Geo170K/images/test/180.png", "question": "As shown in the figure, in the quadrilateral ABCD, E and F are the midpoints of AB and AD respectively. If EF = 2.0, BC = 5.0, CD = 3.0, then tanC is equal to ()", "ground_truth": "\\frac{4}{3}"}
+{"image_path": "./Geo170K/images/test/181.png", "question": "In Rttriangle ABC, angle ACB = 90.0, CD perpendicular  AB at point D, if AC = 3.0, BC = 4.0, then tan\u03b1 is equal to ()", "ground_truth": "\\frac{3}{4}"}
+{"image_path": "./Geo170K/images/test/182.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, sinA = frac {1.0}{3.0}, then the value of cosB is ()", "ground_truth": "\\frac{1}{3}"}
+{"image_path": "./Geo170K/images/test/183.png", "question": "We know that if the sum of two acute angles is equal to a right angle, then these two angles are complementary to each other, referred to as complementary to each other. As shown in the figure, angle A and angle B are complementary, and there are: sinA = frac  angle A's opposite hypotenuse = frac  ac, \\cosB = frac  angle B's adjacent hypotenuse = frac  ac, so we know sinA = \\cosB, notice that in triangle ABC, angle A + angle B = 90.0, that is, angle B = 90.0-angle A, angle A = 90.0-angle B, so there is: sin( 90.0-A) = \\cosA, \\ cos( 90.0-A) = sinA. Try to complete the following multiple-choice questions: If \u03b1 is an acute angle and \\cos\u03b1 = frac {4.0}{5.0}, then the value of sin(90.0-\u03b1) is equal to ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/184.png", "question": "As shown in the figure, it is known that AB and AD are the chords of circle O, angle BOD = 50.0, then the degree of angle BAD is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/185.png", "question": "As shown in the figure, in triangle ABC, the bisectors of the exterior angles of angle ABC and angle ACB intersects at point O, and angle BOC = 40.0, then angle A = ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/186.png", "question": "As shown in the figure, it is known that CD is the diameter of circle O, and the chord DE passing through the point D is parallel to the radius OA. If the angle D = 50.0, the degree of the angle C is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/187.png", "question": "As shown in the figure, point B is on circle O, chord AC parallel  OB, angle BOC = 50.0, then angle OAB = ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/188.png", "question": "As shown in the figure, points A, B, and C are three points on circle O, if angle A = 40.0, then the degree of angle BOC is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/189.png", "question": "In circle O, AB is the diameter, CD is the chord, angle ABD = 28.0, then the degree of angle C is ()", "ground_truth": "62\u00b0"}
+{"image_path": "./Geo170K/images/test/190.png", "question": "As shown in the figure, points A, B, and C are three points on circle O, if angle BOC = 80.0, then the degree of angle A is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/191.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord of circle O, angle CDB = 40.0, then the degree of angle CBA is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/192.png", "question": "As shown in the figure, it is known that O is a point in the quadrilateral ABCD, OA = OB = OC, angle ABC = angle ADC = 65.0, then angle DAO + angle DCO = ()", "ground_truth": "165\u00b0"}
+{"image_path": "./Geo170K/images/test/193.png", "question": "As shown in the figure, AB is the diameter of circle O, angle D = 33.0, then the degree of angle AOC is ()", "ground_truth": "114\u00b0"}
+{"image_path": "./Geo170K/images/test/194.png", "question": "As shown in the figure, in circle O, OA perpendicular  BC, angle AOB = 48.0, D is a point on circle O, then the degree of angle ADC is ()", "ground_truth": "24\u00b0"}
+{"image_path": "./Geo170K/images/test/195.png", "question": "As shown in the figure, the three points A, B, and C are on circle O, angle ABO = 50.0, then angle ACB = ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/196.png", "question": "As shown in the figure, AC is the diameter of circle O, if angle OBC = 40.0, then the degree of angle AOB is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/197.png", "question": "As shown in the figure, in circle A, the known chord BC = 8.0, DE = 6.0, angle BAC + angle EAD = 180.0, then the radius of circle A is ()", "ground_truth": "5"}
+{"image_path": "./Geo170K/images/test/198.png", "question": "Place the protractor on a broken piece of glass as shown in the figure, so that point A is on a semicircle, and the readings of points B and C are 105.0 and 155.0 respectively, then the size of angle BAC is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/199.png", "question": "As shown in the figure, points A and B are three points on circle O and AB = AC. Connect BO and CO, if angle ABC = 65.0, then the degree of angle BOC is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/200.png", "question": "As shown in the figure, given that the degree of the central angle angle AOB is 110.0, then the angle of circumference angle ACB is equal to ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/201.png", "question": "As shown in the figure, the cross section of a tunnel is a semicircle with a radius of 3.4, and a truck with a width of 3.2 can pass through the tunnel.", "ground_truth": "3m"}
+{"image_path": "./Geo170K/images/test/202.png", "question": "As shown in the figure, AB and CD are the two diameters of circle O, the chord DE parallel  AB, if the arc DE is the arc of 40.0, then angle BOC = ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/203.png", "question": "As shown in the figure, in circle O, if point C is the midpoint of arc AB, angle A = 50.0, then angle BOC = ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/204.png", "question": "If AB parallel  CD, angle C = 60.0, then angle A + angle E is equal to ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/205.png", "question": "Known: As shown in the figure, in circle O, OA perpendicular  BC, angle AOB = 70.0, then the degree of angle ADC is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/206.png", "question": "As shown in the figure, CD is the chord of circle O, O is the center of the circle, fold the minor arc of circle O in half along CD, A is a point on the minor arc after folding in half, angle CAD = 110.0, then the degree of angle B is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/207.png", "question": "As shown in the figure, AB is the diameter of circle O, point C is a point on circle O, angle C = 20.0, then the degree of angle BOC is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/208.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, if angle AOB = 130.0, then the degree of angle ACB is ()", "ground_truth": "115\u00b0"}
+{"image_path": "./Geo170K/images/test/209.png", "question": "As shown in the figure, in circle O, chord AB and CD intersect at point E, BE = DE, angle B = 40.0, then the degree of angle A is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/210.png", "question": "As shown in the figure, points A, B, C, D are on circle O, DE perpendicular  OA, DF perpendicular  OB, and the feet of perpendicular are E, F respectively. If angle EDF = 50.0, then the degree of angle C is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/211.png", "question": "As shown in the figure, CD is the diameter of circle O, chord AB intersects CD at point M, M is the midpoint of AB, point P is at arc AD, PC and AB intersect at point N, angle PNA = 60.0, then angle PDC is equal to ( )", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/212.png", "question": "As shown in the figure, AB is the diameter of circle O, chord CD perpendicular  AB at E. Connect OC and AD, and angle A = 35.0, then angle AOC = ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/213.png", "question": "As shown in the figure, in triangle ABC, AB = AC, draw a semicircle with BC as the diameter to intersect AB at E, and it intersects AC at D, the degree of arc CD is 40.0, then the degree of angle A is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/214.png", "question": "As shown in the figure, the points A, B, C, and P are on circle O, CD perpendicular  OA, CE perpendicular  OB, and the feet of perpendicular are D, E, angle DCE = 36.0, then the degree of angle P is ()", "ground_truth": "72\u00b0"}
+{"image_path": "./Geo170K/images/test/215.png", "question": "As shown in the figure, points A, B, C, and P are on circle O, CD perpendicular  OA, CE perpendicular  OB, and the feet of perpendicular are D, E, angle DCE = 40.0, then the degree of angle P is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/216.png", "question": "As shown in the figure, the three points A, B, and C are on the circle. In triangle ABC, angle ABC = 70.0, angle ACB = 30.0, D is the midpoint of the arc BAC. Connect DB and DC, then the degree of angle DBC is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/217.png", "question": "As shown in the figure, AB and CD are the two chords of circle O. Connect AD and BC, if angle BCD = 50.0, then the degree of angle BAD is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/218.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, if angle OAB = 26.0, then the size of angle C is ()", "ground_truth": "64\u00b0"}
+{"image_path": "./Geo170K/images/test/219.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, angle A = 70.0, then the size of angle BOC is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/220.png", "question": "As shown in the figure, in circle O, chord AC parallel  radius OB, angle BOC = 50.0, then the degree of angle OBA is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/221.png", "question": "As shown in the figure, if angle ABC = 30.0, then the degree of angle AOC is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/222.png", "question": "As shown in the figure, in circle O, CD is the diameter, point A, point B on circle O, connect OA, OB, AC, AB, if angle AOB = 40.0, CD parallel  AB, then the size of angle BAC is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/223.png", "question": "As shown in the figure, it is a circular exhibition hall. In order to monitor the entire exhibition hall, two monitors A and B are installed on the circular edge. If the monitoring angle of monitor A is 65.0, the monitoring angle of monitor B is at least ( )", "ground_truth": "115\u00b0"}
+{"image_path": "./Geo170K/images/test/224.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC. Connect OA and OB, angle AOB = 50.0, then the degree of angle C is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/225.png", "question": "It is known that: as shown in the figure, AB is the diameter of circle O, CD is the chord,. Connect AD, AC, angle CAB = 55.0, then angle D = ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/226.png", "question": "As shown in the figure, points A, B, and C are on circle O, if angle C = 35.0, then angle AOB = ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/227.png", "question": "As shown in the figure, AB is the diameter of circle O, and the degree of angle ADC is 35.0, then the degree of angle BOC is ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/228.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, AB is the diameter of circle O, and point C is the midpoint of arc BD. If angle DAB = 50.0, then the size of angle ABC is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/229.png", "question": "As shown in the figure, the quadrilateral ABCD is the inscribed quadrilateral of circle O, AB is the diameter of circle O. Connect BD. If angle BCD = 120.0, then the size of angle ABD is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/230.png", "question": "As shown in the figure, points A, B, C, and D are on circle O, and point E is on the extended line of AD. If angle ABC = 60.0, then the degree of angle CDE is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/231.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in the semicircle O, and it is known that angle ADC = 140.0, then the size of angle AOC is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/232.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O. If angle BOD = 138.0, then the degree of one of its exterior angles angle DCE is ()", "ground_truth": "69\u00b0"}
+{"image_path": "./Geo170K/images/test/233.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, E is a point on the BC extended line, angle A = 50.0, then the degree of angle DCE is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/234.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, F is a point on arc CD, and arc DF = arc BC, connect CF and extend to intersects the extended line of AD at point E, connect AC. If angle ABC = 105.0, angle BAC = 25.0, then the degree of angle E is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/235.png", "question": "As shown in the figure, the quadrilateral ABCD is the inscribed quadrilateral of circle O, if angle C = 140.0, then the degree of angle BOD is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/236.png", "question": "As shown in the figure, in circle O, AB parallel  CD, angle BCD = 100.0, E is any point on arc DC, A, B, C, and D are the four points on circle O, then the angle of angle AEC is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/237.png", "question": "As shown in the figure. Given that the three points A, B, and C are on circle O, point C is on the minor arc AB, and angle AOB = 130.0, then the degree of angle ACB is ()", "ground_truth": "115\u00b0"}
+{"image_path": "./Geo170K/images/test/238.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, if angle ABC = 40.0, then the degree of angle ADC is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/239.png", "question": "As shown in the figure, an exterior angle of the quadrilateral ABCD angle DCE = 70.0, then the degree of angle BAD is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/240.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, angle BOD = 70.0, then the degree of angle BCD is ()", "ground_truth": "145\u00b0"}
+{"image_path": "./Geo170K/images/test/241.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, if one of its exterior angles angle DCE = 64.0, then angle BOD = ()", "ground_truth": "128\u00b0"}
+{"image_path": "./Geo170K/images/test/242.png", "question": "As shown in the figure, the quadrilateral ABCD is a quadrilateral inscribed in the circle, and E is a point on the extended line of AD. If angle CBA = 120.0, then the size of angle EDC is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/243.png", "question": "As shown in the figure, in the circle inscribed in the quadrilateral ABCD, the central angle angle 1 = 100.0, then the angle of circumference angle ABC is equal to ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/244.png", "question": "As shown in the figure, in the inscribed quadrilateral ABCD of the circle, angle ABC = 120.0, then the degree of the exterior angle of the quadrilateral ABCD angle ADE is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/245.png", "question": "As shown in the figure, ABCD is the inscribed quadrilateral of circle O, and angle ABC = 115.0, then angle AOC is equal to ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/246.png", "question": "As shown in the figure, given the angle of circumference angle BAD = 50.0, then the degree of the angle of circumference angle BCD is ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/247.png", "question": "As shown in the figure, circle O is the circumscribed circle of the quadrilateral ABCD, if angle O = 110.0, then the degree of angle C is ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/248.png", "question": "As shown in the figure, the quadrilateral ABCD is inscribed in circle O, if angle C = 36.0, then the degree of angle A is ()", "ground_truth": "144\u00b0"}
+{"image_path": "./Geo170K/images/test/249.png", "question": "As shown in the figure, there are four points A, B, C, D on circle O, where angle A = 80.0, then the degree of angle C is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/250.png", "question": "As shown in the figure, triangle ABC is the inscribed triangle of circle O, if angle ACB = 30.0, AB = 6.0, then the radius of circle O is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/251.png", "question": "As shown in the figure, the line segment AB is the diameter of circle O, the chord CD \u4e04 AB, angle CAB = 20.0, then angle BOD is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/252.png", "question": "As shown in the figure, triangle ABC is the inscribed triangle of circle O, AB is the diameter of circle O, point D is a point on circle O, if angle ACD = 40.0, then the size of angle BAD is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/253.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, angle C = 20.0, then the degree of angle OAB is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/254.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O. Connect OA, OB, if angle C = 35.0, then the degree of angle OBA is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/255.png", "question": "As shown in the figure, it is known that triangle ABC is inscribed in circle O, angle BAC = 50.0, then the degree of angle BOC is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/256.png", "question": "As shown in the figure, in triangle ABC, AB = AC, angle BAC = 70.0, circle O is the circumscribed circle of triangle ABC, point D is on the minor arc arc AC, then the degree of angle D is ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/257.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, angle AOB = 80.0, then the size of angle ACB is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/258.png", "question": "As shown in the figure, triangle ABC is the inscribed triangle of circle O, angle C = 30.0, the radius of circle O is 5.0, if point P is a point on circle O, in triangle ABP, PB = AB, then the length of PA is ( )", "ground_truth": "5\u221a{3}"}
+{"image_path": "./Geo170K/images/test/259.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, OC perpendicular  OB, OD perpendicular  AB intersects AC at point E. Knowing that the radius of circle O is 1.0, then the value of AE^ 2 + CE^ 2 is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/260.png", "question": "As shown in the figure, it is known that circle O is the circumscribed circle of triangle ABC, and AB is the diameter of circle O, if OC = 5.0, AC = 6.0, then the length of BC is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/261.png", "question": "As shown in the figure, angle XOY = 45.0, the two vertices A and B of a right triangle ABC move on OX and OY respectively, where AB = 10.0, then the maximum value of the distance from point O to vertex A is ()", "ground_truth": "10\u221a{2}"}
+{"image_path": "./Geo170K/images/test/262.png", "question": "As shown in the figure, in triangle ABC, angle BAC = 70.0, angle ABC = 45.0, point O is the center of the circumscribed circle of triangle ABC, then angle AOB is equal to ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/263.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABD, if angle A = 135.0, then the degree of angle BDO is ()", "ground_truth": "45\u00b0"}
+{"image_path": "./Geo170K/images/test/264.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, if angle AOB = 110.0, then the degree of angle ACB is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/265.png", "question": "As shown in the figure, it is known that the angle between the diameter AB of circle O and the chord AC is 30.0, the tangent PC passing through point C and the extended line of AB intersect at point P, the radius of circle O is 2.0, then PC is ()", "ground_truth": "2\u221a{3}"}
+{"image_path": "./Geo170K/images/test/266.png", "question": "As shown in the figure, AB cuts circle O at point B, AO intersects circle O at point C, and point D is at circle O. If angle A = 40.0, then the degree of angle BDC is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/267.png", "question": "As shown in the figure, AB is the diameter of circle O, point D is on the extended line of AB, passing point D is the tangent of circle O, and the tangent point is C, if angle A = 25.0, then angle D = ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/268.png", "question": "As shown in the figure, in the circle O with a radius of 2.0, C is a point on the extended line of the diameter AB, CD is tangent to the circle at point D. Connect AD, given that angle DAC = 30.0, the length of the line segment CD is ()", "ground_truth": "2\u221a{3}"}
+{"image_path": "./Geo170K/images/test/269.png", "question": "circle O is a circle with a radius of 1.0, the distance from point O to line L is 3.0, draw a tangent of circle O through any point P on the straight line L , and the tangent point is Q; if PQ is taken as the edge to make the square PQRS, then the minimum area of the square PQRS is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/270.png", "question": "As shown in the figure, AB is the diameter of circle O, C is the point on circle O, passing point C is the tangent of circle O and intersects the extended line of AB at point E, OD perpendicular  AC at point D, if angle E = 30.0, CE = 6.0, then the value of OD is ()", "ground_truth": "\u221a{3}"}
+{"image_path": "./Geo170K/images/test/271.png", "question": "As shown in the figure, the straight line AB is tangent to circle O at point A, the radius of circle O is 1.0, if angle OBA = 30.0, then the length of OB is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/272.png", "question": "As shown in the figure, it is known that BA is the tangent of circle O, and connect OB to intersect circle O at point C. If angle B = 45.0 and the length of AB is 2.0, then the length of BC is ()", "ground_truth": "2\u221a{2}-2"}
+{"image_path": "./Geo170K/images/test/273.png", "question": "As shown in the figure, AB is the diameter of circle O, point P is a point outside circle O, PO intersects circle O at point C. Connect BC and PA. If angle P = 36.0, PA is tangent to circle O, then angle B is equal to ()", "ground_truth": "27\u00b0"}
+{"image_path": "./Geo170K/images/test/274.png", "question": "As shown in the figure, AB, AC, and BD are the tangents of circle O, and the tangent points are P, C, and D respectively. If AB = 5.0, AC = 3.0, then the length of BD is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/275.png", "question": "As shown in the figure, AB is the diameter of circle O, PA is tangent to circle O at point A, line segment PO intersects circle O at point C, and connect BC, if angle P = 36.0, then angle B is equal to ()", "ground_truth": "27\u00b0"}
+{"image_path": "./Geo170K/images/test/276.png", "question": "As shown in the figure, PA and PB are tangents of circle O, the tangent point of point A and B, AC is the diameter of circle O, given that angle P = 50.0, then the size of angle ACB is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/277.png", "question": "As shown in the figure, points A, B, and C are on circle O, and the tangent line of circle O passing through point A intersects the extended line of OC at point P, angle B = 30.0, OP = 3.0, then the length of AP is ()", "ground_truth": "\\frac{3}{2}\u221a{3}"}
+{"image_path": "./Geo170K/images/test/278.png", "question": "As shown in the figure, in circle O, AD and CD are chords. Connect OC and extend, and it intersects the tangent of point A at point B. If angle ADC = 25.0, then the degree of angle ABO is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/279.png", "question": "As shown in the figure, the straight lines PA and PB are the two tangents of circle O. If angle APB = 120.0, the radius of circle O is 10.0, then the length of chord AB is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/280.png", "question": "As shown in the figure, AC is the tangent of circle O, the tangent point is C, BC is the diameter of circle O, AB intersects circle O at point D. Connect OD, if angle BAC = 50.0, then the size of angle COD is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/281.png", "question": "As shown in the figure, AB is the diameter of circle O, BP is the tangent of circle O, AP and circle O intersect at point G, point D is the point on arc BC, if angle P = 40.0, then angle ADC is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/282.png", "question": "As shown in the figure, AB is the diameter of circle O, PA is tangent to circle O at point A, OP intersects circle O at point C, and connect BC. If angle P = 20.0, then the degree of angle B is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/283.png", "question": "As shown in the figure, PA and PB are tangent to circle O at points A and B respectively, the tangent EF of circle O intersects PA and PB at points E and F respectively, and the tangent point C is on the arc AB. If the length of PA is 2.0, then the perimeter of triangle PEF is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/284.png", "question": "Put the ruler, the triangle ruler and the round nut on the desktop as shown in the figure, angle CAB = 60.0, if AD = 6.0, then the outer diameter of the round nut is ()", "ground_truth": "12\u221a{3}cm"}
+{"image_path": "./Geo170K/images/test/285.png", "question": "AB is the diameter of circle O, PA is tangent to circle O at point A, and PO intersects circle O at point C; connect BC, if angle P = 40.0, then angle B is equal to ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/286.png", "question": "As shown in the figure, AB is the diameter of circle O, DB and DC are respectively tangent to circle O at points B and C. If angle ACE = 25.0, then the degree of angle D is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/287.png", "question": "As shown in the figure, AB is the tangent of circle O, A is the tangent point, the extended line of BO intersects circle O at point C, angle OAC = 35.0, then the degree of angle B is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/288.png", "question": "As shown in the figure, in triangle ABC, angle B = 20.0, point O is a point on the edge of BC, take O as the center and OB as the radius to make a circle, intersect the AB edge at point D, connect CD, if CD happens to be tangent of circle O , then the degree of angle DCB is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/289.png", "question": "As shown in the figure, PA and PB are the tangents of circle O, points A and B are the tangent points, and AC is the diameter of circle O. Given that angle P = 50.0, the size of angle ACB is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/290.png", "question": "As shown in the figure, PA and PB are tangent to circle O at two points A and B respectively, point C is on the major arc arc ACB, angle P = 80.0, then the degree of angle C is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/291.png", "question": "As shown in the figure, circle O is the circumscribed circle of Rttriangle ABC, angle ACB = 90.0, angle A = 25.0, crossing point C to draw the tangent of circle O, and intersects the extended line of AB at point D, then the degree of angle D is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/292.png", "question": "As shown in the figure, AB is the diameter of circle O, point C is on the extended line of AB, CD is tangent to circle O, and the tangent point is D. If angle A = 35.0, then angle C = ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/293.png", "question": "As shown in the figure, point P is a point on the extended line AB of the diameter of circle O, passing point P to draw the tangent PC of circle O, and the tangent point is C. If AO = OB = PB = 1.0, then the length of PC is ()", "ground_truth": "\u221a{3}"}
+{"image_path": "./Geo170K/images/test/294.png", "question": "As shown in the figure, in triangle ABC, AB = 5.0, BC = 3.0, AC = 4.0, the circle with point C as the center is tangent to AB, then the radius of circle C is ()", "ground_truth": "2.4"}
+{"image_path": "./Geo170K/images/test/295.png", "question": "As shown in the figure, points A, B, and C are three points on circle O, and the straight line CD and circle O are tangent to point C. If angle DCB = 40.0, then the degree of angle CAB is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/296.png", "question": "As shown in the figure, the straight line AB and circle O are tangent to point A, the radius of circle O is 2.0, if angle OBA = 30.0, then the length of AB is ()", "ground_truth": "2\u221a{3}"}
+{"image_path": "./Geo170K/images/test/297.png", "question": "As shown in the figure, AB is the diameter of circle O, AC is tangent to circle O at A, BC intersects circle O at point D, if angle C = 70.0, then the degree of angle AOD is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/298.png", "question": "As shown in the figure, PA and PB are tangent to circle O at points A and B respectively, point E is a point on circle O, and angle AEB = 60.0, then angle P = ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/299.png", "question": "As shown in the figure, PA and PB are the tangents of circle O, AC is the diameter of circle O, angle c = 55.0, then angle APB is equal to ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/300.png", "question": "As shown in the figure, PA and PB are tangent to circle O at A and B respectively, angle P = 70.0, then angle C is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/301.png", "question": "As shown in the figure, the line segment AB is the diameter of circle O, points C and D are points on circle O, and the tangent of circle O passing through point C intersects the extended line of AB at point E. If angle E = 50.0, then angle CDB is equal to ( )", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/302.png", "question": "The straight line AB and circle O are tangent to point A, as shown in the figure, if angle OBA = 60.0, AB = 1.0, then the radius of circle O is ()", "ground_truth": "\u221a{3}"}
+{"image_path": "./Geo170K/images/test/303.png", "question": "As shown in the figure, in Rttriangle ABC, AC = 4.0, AB = 5.0, angle C = 90.0, the circle passing through point C which is tangent to the edge AB intersects the edges CB and CA of triangle ABC at points E, F. The minimum length of the line segment EF is ()", "ground_truth": "2.4"}
+{"image_path": "./Geo170K/images/test/304.png", "question": "As shown in the figure, the radii of the two concentric circles are 3.0 and 5.0 respectively, and a chord AB of the great circle is tangent to the small circle, then the length of the chord AB is ()", "ground_truth": "8cm"}
+{"image_path": "./Geo170K/images/test/305.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, AD is the diameter of circle O, and EA is the tangent of circle O. If angle EAC = 120.0, then the degree of angle ABC is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/306.png", "question": "As shown in the figure, the radius of circle O is 2.0, the distance from point O to line l is 3.0, and point P is a moving point on line l. If PB is tangent to circle O at point B, then the minimum value of PB is ()", "ground_truth": "\u221a{5}"}
+{"image_path": "./Geo170K/images/test/307.png", "question": "As shown in the figure, PA, PB, and CD are the tangents of circle O, A, B, and E are the tangent points, and CD intersects the line segments PA and PB at C and D respectively. If angle APB = 40.0, then the degree of angle COD is ( )", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/308.png", "question": "As shown in the figure, AB is the diameter of circle O, point C is on circle O, AE is the tangent of circle O, A is the tangent point, connect BC and extend to intersect AE at point D. If angle AOC = 80.0, then the degree of angle ADB is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/309.png", "question": "As shown in the figure, AC is the tangent of circle O, the tangent point is C, BC is the diameter of circle O, AB intersects circle O at point D, and connect OD. If angle BAC = 55.0, then the size of angle COD is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/310.png", "question": "As shown in the figure, in triangle ABC, AB = 3.0, AC = 2.0. When angle B is the largest, the length of BC is ()", "ground_truth": "\u221a{5}"}
+{"image_path": "./Geo170K/images/test/311.png", "question": "As shown in the figure, AB is the diameter of the semicircle, point O is the center of the circle, point C is a point on the extended line of AB, and CD is tangent to the semicircle at point D. If AB = 6.0, CD = 4.0, then the value of sinangle C is ()", "ground_truth": "\\frac{3}{5}"}
+{"image_path": "./Geo170K/images/test/312.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, angle A = 30.0, BC = 2.0, the radius of circle C is 1.0, point P is the point on the hypotenuse AB, passing point P is a tangent PQ of circle C (Point Q is the tangent point), then the minimum value of the line segment PQ is ()", "ground_truth": "\u221a{2}"}
+{"image_path": "./Geo170K/images/test/313.png", "question": "As shown in the figure, AB and AC are the two chords of circle O. The tangent passing point B and the extended line of OC intersect at point D. If angle D = 36.0, then the degree of angle CAB is ()", "ground_truth": "27\u00b0"}
+{"image_path": "./Geo170K/images/test/314.png", "question": "As shown in the figure, the diameters of the two concentric circles are 6.0 and 10.0, and a chord AB of the great circle is tangent to the small circle, so the length of the chord AB is ()", "ground_truth": "8cm"}
+{"image_path": "./Geo170K/images/test/315.png", "question": "As shown in the figure, AP and BP are tangent to circle O at points A and B respectively, angle P = 60.0, point C is on the major arc AB, then the degree of angle C is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/316.png", "question": "As shown in the figure, P is a point on the AB extended line of the diameter of circle O, PC is tangent to circle O at C, angle P = 50.0, angle A is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/317.png", "question": "As shown in the figure, PA and PB are the tangents of circle O, and the tangent points are A and B. If angle OAB = 30.0, then the degree of angle P is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/318.png", "question": "As shown in the figure, PA, PB are circle O is tangent, AC is the diameter of circle O, if angle BAC = 25.0, then angle P is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/319.png", "question": "As shown in the figure, the straight line BC is tangent to circle O at point A, AD is the chord of circle O. Connect OD, if angle DAC = 50.0, then the degree of angle ODA is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/320.png", "question": "As shown in the figure, a torus carpet is to be laid in the lobby of a hotel. The worker only measures the length of the chord AB of the great circle that is tangent to the small circle, and then calculates the area of \u200b\u200bthe torus. If the measured length of AB is 8.0, the area of \u200b\u200bthe torus is ()", "ground_truth": "16\u03c0\u5e73\u65b9\u7c73"}
+{"image_path": "./Geo170K/images/test/321.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is tangent to circle O at point D, and the extended line of AB intersects CD at point C, if angle ACD = 40.0, then angle A = ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/322.png", "question": "As shown in the figure, AB is the diameter of circle O, point D is on the extended line of AB, and DC is tangent to circle O at point C, if angle A = 26.0, then angle D is equal to ()", "ground_truth": "38\u00b0"}
+{"image_path": "./Geo170K/images/test/323.png", "question": "As shown in the figure, in triangle ABC, AB = AC, angle BAO = 45.0, triangle ABC is inscribed in circle O, D is a point on circle O, passing point D is the tangent of circle O and the extended line of BC at E, if DE perpendicular  BC, AD = 2.0\u221a{2.0}, then the length of DE is ()", "ground_truth": "\u221a{2}"}
+{"image_path": "./Geo170K/images/test/324.png", "question": "As shown in the figure, AB is the tangent of circle O, B is the tangent point, AO and circle O intersect at point C, if angle BAO = 40.0, then the degree of angle OCB is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/325.png", "question": "As shown in the figure, circle O ia tangent to AB at point C, angle BCE = 60.0, DC = 6.0, DE = 4.0, then S_triangle CDE is ()", "ground_truth": "6\u221a{3}"}
+{"image_path": "./Geo170K/images/test/326.png", "question": "As shown in the figure, AB is tangent to circle O at B, and the secant ACD passes through the center O, if angle BCD = 70.0, then the degree of angle A is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/327.png", "question": "As shown in the figure, in triangle ABC, AB = 15.0, AC = 12.0, BC = 9.0, the moving circle passing through point C and tangent to AB intersects CB and CA at points E and F respectively, then the minimum value the length of the line segment EF is ()", "ground_truth": "\\frac{36}{5}"}
+{"image_path": "./Geo170K/images/test/328.png", "question": "As shown in the figure, BC is tangent to circle O at point C, and the extended line of BO intersects circle O at point A, connect AC, if angle ACB = 120.0, then the degree of angle A is equal to ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/329.png", "question": "As shown in the figure, a quadrilateral green garden, with circular fountains with a radius of 2.0 on all four corners, then the area of \u200b\u200bthe green garden occupied by these four fountains is ()", "ground_truth": "4\u03c0"}
+{"image_path": "./Geo170K/images/test/330.png", "question": "As shown in the figure, in the square ABCD with edge length 4.0, first draw the arc with point A as the center, the length of AD as the radius, and then draw the arc with the midpoint of the AB side as the center, and half of the AB length as the radius, then the area of the shaded part between the two arcs is () (results remain N_1)", "ground_truth": "2\u03c0"}
+{"image_path": "./Geo170K/images/test/331.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, AE = 3.0, AC = 9.0, AD = 4.0, then the value of AB is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/332.png", "question": "As shown in the figure, AB parallel  CD, AD and BC intersect at point O, if AO = 2.0, DO = 4.0, BO = 3.0, then the length of BC is ()", "ground_truth": "9"}
+{"image_path": "./Geo170K/images/test/333.png", "question": "As shown in the figure, given that a parallel  b parallel  c, AB = 1.0, BC = 2.0, EF = 4.0, then DE = ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/334.png", "question": "As shown in the figure, straight lines a, b, and c intersect straight lines and n at points A, B, C, D, E, and F respectively. Given the straight line a parallel  b parallel  c, if AB = 2.0, BC = 3.0, then the value of frac  DEEF is ()", "ground_truth": "\\frac{2}{3}"}
+{"image_path": "./Geo170K/images/test/335.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, if AD = 1.0, DB = 2.0, then the value of frac  AEAC is ()", "ground_truth": "\\frac{1}{3}"}
+{"image_path": "./Geo170K/images/test/336.png", "question": "As shown in the figure, in triangle ABC, M is the midpoint of AC, E is a point on AB, AE=frac {1.0}{4.0}AB, connect EM and extend, and it intersects the extended line of BC at D, then frac  {BC}{CD} = ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/337.png", "question": "As shown in the figure, in the rectangle ABCD, AB = 3.0, BC = 4.0, point M is on BC, and satisfies BM = 1.0, cross D to make DN perpendicular  AM which intersects AM at point N, then the length of DN is ()", "ground_truth": "\\frac{6}{5}\u221a{10}"}
+{"image_path": "./Geo170K/images/test/338.png", "question": "As shown in the figure, in triangle ABC, point D and point E are on AB and BC respectively, and DE parallel  AC, BE = 2.0, CE = 1.0, the area of \u200b\u200btriangle BDE is 4.0, then the area of \u200b\u200btriangle ABC is ( )", "ground_truth": "9"}
+{"image_path": "./Geo170K/images/test/339.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, if AB = 7.0, AC = 5.0, AD = 3.0, then DE = ()", "ground_truth": "\\frac{20}{7}cm"}
+{"image_path": "./Geo170K/images/test/340.png", "question": "As shown in the figure, in parallelogram ABCD, point E is on the edge AD, CE intersects BD at point F, if EF = frac {1.0}{3.0}FC, then frac {AE}{ED} = ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/341.png", "question": "As shown in the figure, given that the point M is the midpoint of edge AB of the parallelogram ABCD, the line segment CM intersects BD at the point E, Striangle BEM = 2.0, then the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/342.png", "question": "As shown in the figure, in the quadrilateral ABCD, AD \u2016 BC, diagonal AC and BD intersect at O, if \\\\ frac {s {\\triangle ADO} {s {\\triangle DOC}} = \\frac  {1}{3}", "ground_truth": "\\frac{1}{3}"}
+{"image_path": "./Geo170K/images/test/343.png", "question": "As shown in the figure, in parallelogram ABCD, angle C = 120.0, AB = AE = 5.0, AE and BD intersect at point F, AF = 2 EF. Then the length of BC is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/344.png", "question": "As shown in the figure, D and E are the points on the edges AB and AC of triangle ABC, DE parallel  BC, if AD:DB=1.0:3.0, AE = 2.0, then the length of AC is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/345.png", "question": "As shown in the figure, given that AB parallel  CD parallel  EF, AD:AF=3.0:5.0,BE=15.0, then the length of CE is equal to ()", "ground_truth": "9"}
+{"image_path": "./Geo170K/images/test/346.png", "question": "As shown in the figure, AD parallel  BE parallel  CF, straight line l2.0, l3.0 and these three parallel lines intersect at points A, B, C, D, E, F, frac {AB}{BC}=frac {2.0}{3.0},DE=6.0, then the value of EF is ( )", "ground_truth": "9"}
+{"image_path": "./Geo170K/images/test/347.png", "question": "As shown in the figure, it is known that a straight line a parallel  b parallel  c, a straight line, n and a, b, c intersect at points A, C, E, B, D, F, if AC = 4.0, AE = 10.0, BD = 3.0, then the value of DF is ()", "ground_truth": "4.5"}
+{"image_path": "./Geo170K/images/test/348.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, if AD = 1.0, DB = 2.0, then the value of frac  ADAB is ()", "ground_truth": "\\frac{1}{3}"}
+{"image_path": "./Geo170K/images/test/349.png", "question": "As shown in the figure, the straight line a parallel  b parallel  c, the straight line, n and a, b, c intersect at the points A, C, E and B, D, F respectively, if AC = 4.0, AE = 10.0, BF =frac {15.0}{2.0}, then the length of DF is ()", "ground_truth": "\\frac{9}{2}"}
+{"image_path": "./Geo170K/images/test/350.png", "question": "As shown in the figure, given that a parallel  b parallel  c, AC = 6.0, AB = 2.0, EF = 5.0, then the value of DF is ()", "ground_truth": "\\frac{15}{2}"}
+{"image_path": "./Geo170K/images/test/351.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, frac  {AD}{DB} = frac {1.0}{2.0}, DE = 4.0, then the length of BC is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/352.png", "question": "As shown in the figure, in triangle ABC, the points D and E are on the edges AB and AC respectively, DE parallel  BC, given that EC = 6.0, frac {AD}{DB}=frac {2.0}{3.0}, then the length of AE is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/353.png", "question": "As shown in the figure, in triangle ABC, D and E are points on AB and AC respectively, which satisfy AD = 3.0, AE = 2.0, EC = 1.0, DE parallel  BC, then AB = ()", "ground_truth": "4.5"}
+{"image_path": "./Geo170K/images/test/354.png", "question": "As shown in the figure, AB parallel  CD parallel  EF, AC and BD intersect at point E, if CE = 5.0, CF = 4.0, AE = BC, then the value of frac  CDAB is ()", "ground_truth": "\\frac{1}{4}"}
+{"image_path": "./Geo170K/images/test/355.png", "question": "As shown in the figure, in triangle ABC, points D and E are on edges AB and AC respectively, DE parallel  BC. If frac {AE}{AC}=frac {3.0}{4.0},AD=9.0, then AB is equal to ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/356.png", "question": "As shown in the figure, the straight line l_{1.0}parallel l_{2.0}parallel l_{3.0}, it is known that: AB=4.0,BC=6.0,DE=3.0, then EF = ()", "ground_truth": "4.5"}
+{"image_path": "./Geo170K/images/test/357.png", "question": "As shown in the figure, in the parallelogram ABCD, F is a point on AB, DF intersects AC at point E, if CD = 10.0, frac {AE}{EC}=frac {2.0}{5.0}, then the length of BF is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/358.png", "question": "As shown in the figure, E is a point on AD of the parallelogram ABCD, passing the point E to draw EF parallel  AB and it intersects BD at F, if DE:EA=2.0:3.0,EF=4.0, then the length of CD is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/359.png", "question": "As shown in the figure, it is known that l_ 1 parallel  l_ 2 parallel  l_ 3, if AB:BC=2.0:3.0,DE=4.0, then the length of EF is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/360.png", "question": "As shown in the figure DE parallel  BC, AD = 3.0, DB = 4.0, AE = 1.5, then EC is equal to ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/361.png", "question": "As shown in the figure, in triangle ABC, points D and E are on edges AB and AC respectively, DE parallel  BC, and AE = 1.0, AC = 5.0, AB = 6.0, then the length of AD is ()", "ground_truth": "1.2"}
+{"image_path": "./Geo170K/images/test/362.png", "question": "As shown in the figure, D and E are points on AB and AC of triangle ABC, and DE parallel  BC, if DE:BC=3.0:5.0,AD=6.0, then AB = ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/363.png", "question": "As shown in the figure, triangle ABC similar  triangle AED, angle ADE = 80.0, angle A = 60.0, then angle B is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/364.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, BC = 3.0, AC = 4.0, if triangle ABC similar  triangle BDC, then CD = ()", "ground_truth": "\\frac{9}{4}"}
+{"image_path": "./Geo170K/images/test/365.png", "question": "As shown in the figure, triangle ABC similar  triangle DEF, the scale factor of similarity is 1.0:2.0, if EF = 2.0, the length of BC is ()", "ground_truth": "1"}
+{"image_path": "./Geo170K/images/test/366.png", "question": "As shown in the figure, DE is the perpendicular bisector of triangle ABC. Given that the area of \u200b\u200btriangle ABC is 8.0^2, then the area of \u200b\u200btriangle ADE is ()^2.", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/367.png", "question": "As shown in the figure, D is a point on BC of triangle ABC, it is known that AB = 6.0, AD = 3.0, AC = 4.0, angle DAC = angle B, then the length of BD is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/368.png", "question": "As shown in the figure, if triangle ABC similar  triangle ACD, angle A = 60.0, angle ACD = 40.0, then the degree of angle BCD is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/369.png", "question": "As shown in the figure, two straight lines are intercepted by three parallel lines, AB = 2.0, BC = 3.0, then frac  EFEG is equal to ()", "ground_truth": "\\frac{2}{5}"}
+{"image_path": "./Geo170K/images/test/370.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, if frac {AD}{AB}=frac {1.0}{3.0}", "ground_truth": "\\frac{1}{3}"}
+{"image_path": "./Geo170K/images/test/371.png", "question": "As shown in the figure, C and M are two points on the line segment AB, and the point M is the midpoint of the line segment AC. If AB = 8.0, BC = 2.0, then the length of AM is ()", "ground_truth": "3cm"}
+{"image_path": "./Geo170K/images/test/372.png", "question": "As shown in the figure, C and D are two points on the line segment AB. If CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is equal to ()", "ground_truth": "6cm"}
+{"image_path": "./Geo170K/images/test/373.png", "question": "As shown in the figure, BC=frac {1.0}{2.0}AB, D is the midpoint of AC, if DC = 3.0, then the length of AB is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/374.png", "question": "As shown in the figure, after Xiaolin walks straight in the direction of west from point P 12.0, turns left, the angle of rotation is \u03b1, and then walks 12.0, repeating this, Xiaolin has walked 108.0 and returned to point P, then the value of \u03b1-5.0 is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/375.png", "question": "As shown in the figure, it is known that AD is the midline of triangle ABC, and the perimeter of triangle ABD is 3.0 larger than the perimeter of triangle ACD, then the difference between AB and AC is ()", "ground_truth": "3cm"}
+{"image_path": "./Geo170K/images/test/376.png", "question": "As shown in the figure, in triangle ABC, AB = AC, M and N are the midpoints of AB and AC respectively, D and E are points on BC. Connect DN, EM. If AB = 13.0, BC = 10.0, DE = 5.0, the area of \u200b\u200bthe shaded part in the figure is 2.0. ()", "ground_truth": "30"}
+{"image_path": "./Geo170K/images/test/377.png", "question": "As shown in the figure, C is a point on the semicircle O with AB as the diameter, connect AC and BC, and make square ACDE and BCFG with AC and BC as the edges respectively. The midpoints of DE, FG, arc \\athrAC, arc \\athrBC are M, N, P, Q respectively. If MP + NQ = 14.0, AC + BC = 18.0, then the length of AB is ()", "ground_truth": "13"}
+{"image_path": "./Geo170K/images/test/378.png", "question": "As shown in the figure, in the quadrilateral ABCD, point P is the midpoint of the diagonal BD, points E and F are the midpoints of AB and CD respectively, AD = BC, angle FPE = 136.0, then the degree of angle PFE is ()", "ground_truth": "22\u00b0"}
+{"image_path": "./Geo170K/images/test/379.png", "question": "As shown in the figure, it is known that the straight line a parallel  b parallel  c and the straight line d are perpendicular to them and intersect at the three points A, B and C. If AB = 3.0 and AC = 8.0, the distance between the parallel lines b and c is ( )", "ground_truth": "5"}
+{"image_path": "./Geo170K/images/test/380.png", "question": "As shown in the figure, it is known that angle 1 + angle 2 = 100.0, then angle 3 = ().", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/381.png", "question": "As shown in the figure, there is a pond. To measure the distance between A and B at both ends of the pond, firstly take a point C on the flat ground that can directly reach points A and B without passing through the pond, connect AC and extend to D, so that CD = CA , Connect BC and extend to E, make CE = CB, connect ED. If DE = 58.0 is measured, then the distance between A and B is ()", "ground_truth": "58\u7c73"}
+{"image_path": "./Geo170K/images/test/382.png", "question": "As shown in the figure, the parallel lines a and b are intercepted by the straight line c. If angle 1 = 50.0, then the degree of angle 2 is ()", "ground_truth": "130\u00b0"}
+{"image_path": "./Geo170K/images/test/383.png", "question": "As shown in the figure, it is known that angle 1 = 60.0, angle A + angle B + angle C + angle D + angle E + angle F = ()", "ground_truth": "240\u00b0"}
+{"image_path": "./Geo170K/images/test/384.png", "question": "As shown in the figure, in the isosceles triangle ABC, AB = AC, BD is the height on AC, if angle A = 36.0, then the size of angle DBC is ()", "ground_truth": "18\u00b0"}
+{"image_path": "./Geo170K/images/test/385.png", "question": "As shown in the figure, if a parallel  b, angle 1 = 115.0, then angle 2 = ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/386.png", "question": "As shown in the figure, in triangle ABC, angle A = 80.0. Point D is a point on the extended line of BC, angle ACD = 150.0, then angle B = ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/387.png", "question": "As shown in the figure, a parallel  b, point B is on the straight line a, and AB perpendicular  BC, angle 1 = 35.0, then angle 2 = ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/388.png", "question": "As shown in the figure, the line AB and CD intersect at E, and there is a point F on the bisector of angle CEB, FM parallel  AB. When angle 3 = 10.0, the degree of angle F is ()", "ground_truth": "85\u00b0"}
+{"image_path": "./Geo170K/images/test/389.png", "question": "As shown in the figure, it is known that straight lines a and b are intercepted by straight line c. If a parallel  b, angle 1 = 120.0, then the degree of angle 2 is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/390.png", "question": "As shown in the figure, C and D are two points on circle O with the line segment AB as the diameter. If CA = CD, and angle CAB = 25.0, then the degree of angle ACD is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/391.png", "question": "As shown in the figure, AB parallel  EF, CD perpendicular  EF at point D, if angle BCD = 140.0, then the degree of angle ABC is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/392.png", "question": "As shown in the figure, in triangle ABC, angle B = 40.0, passing point C to draw CD parallel  AB, angle ACD = 65.0, then the degree of angle ACB is ()", "ground_truth": "75\u00b0"}
+{"image_path": "./Geo170K/images/test/393.png", "question": "Place a pair of right triangle plates as shown in the figure, so that the leg of the triangle plate with angle 30.0 and the leg of the triangle plate with angle 45.0 are on the same straight line, then the degree of angle 1 is ()", "ground_truth": "75\u00b0"}
+{"image_path": "./Geo170K/images/test/394.png", "question": "Given the straight line a parallel  b, a right triangle plate is placed as shown in the figure, if angle 1 = 37.0, then the degree of angle 2 is ()", "ground_truth": "53\u00b0"}
+{"image_path": "./Geo170K/images/test/395.png", "question": "As shown in the figure, the diagonal AC and BD of the rectangle ABCD intersect at point O, CE parallel  BD, DE parallel  AC, if AB = 4.0, BC = 3.0, then the perimeter of the quadrilateral CODE is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/396.png", "question": "As shown in the figure, put the right-angled vertex of the triangle plate with 30.0 angle on one side of the ruler, if angle 1 = 35.0, then the degree of angle 2 is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/397.png", "question": "As shown in the figure, in parallelogram ABCD, F is a point on AD, CF = CD. If angle B = 72.0, then the degree of angle AFC is ()", "ground_truth": "108\u00b0"}
+{"image_path": "./Geo170K/images/test/398.png", "question": "From a corner of the cubic blank with edge length 4.0, excavate a small cube with edge length 2.0 to obtain a part as shown in the figure, then the surface area of \u200b\u200bthis part is ()", "ground_truth": "96"}
+{"image_path": "./Geo170K/images/test/399.png", "question": "As shown in the figure, the points B, O, D are on the same straight line, if angle 1 = 15.0, angle 2 = 105.0, then the degree of angle AOC is ()", "ground_truth": "90"}
+{"image_path": "./Geo170K/images/test/400.png", "question": "As shown in the figure, the right-angled vertices of the two triangle plates are overlapped and stacked together. If angle 1 = 40.0, then the degree of angle 2 is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/401.png", "question": "As shown in the figure, after a car has turned twice through a section of road, it is the same as the original driving direction, that is, the two roads before and after turning are parallel to each other. The first turning angle angle B is equal to 142.0, and the degree of angle the second turning angle C is ()", "ground_truth": "142\u00b0"}
+{"image_path": "./Geo170K/images/test/402.png", "question": "As shown in triangle ABC, angle ACB = 90.0, AD bisects angle BAC and it intersects BC at D, DE is perpendicular to AB to E, if DE = 1.5, BD = 3.0, then BC = ()", "ground_truth": "4.5cm"}
+{"image_path": "./Geo170K/images/test/403.png", "question": "As shown in the figure, AB = AC, AD = AE, angle BAC = angle DAE, angle 1 = 25.0, angle 2 = 30.0, then angle 3 = ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/404.png", "question": "As shown in the figure, in triangle ABC, angle B = 46.0, angle C = 54.0, AD bisects angle BAC and it intersects BC at D, then the size of angle BAD is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/405.png", "question": "As shown in the figure, it is known that D is a point on BC, angle B = angle 1, angle BAC = 78.0, then angle 2 = ()", "ground_truth": "78\u00b0"}
+{"image_path": "./Geo170K/images/test/406.png", "question": "As shown in the figure, in triangle ABC, angle ACB = 90.0, fold triangle CBD along CD so that point B falls exactly at point E on the edge of AC. If angle A = 24.0, then the degree of angle BDC is ()", "ground_truth": "69\u00b0"}
+{"image_path": "./Geo170K/images/test/407.png", "question": "As shown in the figure, DE is the perpendicular bisector of BC of triangle ABC, and it intersects BC at E as well as intersects AB at D, and angle B = 40.0, angle A = 60.0, then the degree of angle ACD is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/408.png", "question": "As shown in the figure, in circle O with radius 5.0, AB is a chord, OC perpendicular  AB at point C, and OC = 3.0, then the value of AB is ()", "ground_truth": "8cm"}
+{"image_path": "./Geo170K/images/test/409.png", "question": "As shown in the figure, in circle O, OA perpendicular  OB, angle A = 35.0, then the degree of arc CD is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/410.png", "question": "As shown in the figure, in triangle ABC, DE is the perpendicular bisector of AC, AE = 3.0, the perimeter of triangle ABD is 13.0, then the perimeter of triangle ABC is ()", "ground_truth": "19cm"}
+{"image_path": "./Geo170K/images/test/411.png", "question": "As shown in the figure, \u22bfABC is inscribed in circle O, if angle OAB = 28.0, then the size of angle C is ()", "ground_truth": "62\u00b0"}
+{"image_path": "./Geo170K/images/test/412.png", "question": "As shown in the figure, it is known that PA and PB are the tangents of circle O, A and B are the tangent points, AC is the diameter of circle O, angle P = 40.0, then the degree of angle BAC is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/413.png", "question": "As shown in the figure, in Rttriangle ABC, angle B = 90.0, AB = 6.0, AC = 10.0 Fold triangle ABC along ED to make point C coincide with point A, then the perimeter of triangle ABE is equal to ()", "ground_truth": "14"}
+{"image_path": "./Geo170K/images/test/414.png", "question": "As shown in the figure, a rectangular ruler is broken and dislocated along a straight line, and points E, D, B, and F are on the same straight line. If angle ADE = 125.0, then the degree of angle DBC is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/415.png", "question": "As shown in the figure, the central angle angle AOB = 60.0 \u2218, then the degree of the angle of circumference angle ACB is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/416.png", "question": "As shown in the figure, in triangle ABC, angle A = 90.0, AB = AC, BD bisects angle ABE, DE perpendicular  BC, if BC = 10.0, then the perimeter of triangle DEC is ()", "ground_truth": "10cm"}
+{"image_path": "./Geo170K/images/test/417.png", "question": "As shown in the figure, the perpendicular bisector of the isosceles trapezoid ABCD circumscribed by the circle EF = 15.0, then the perimeter of the isosceles trapezoid ABCD is equal to ()", "ground_truth": "60cm"}
+{"image_path": "./Geo170K/images/test/418.png", "question": "As shown in the figure, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is ()", "ground_truth": "6cm"}
+{"image_path": "./Geo170K/images/test/419.png", "question": "As shown in the figure, in triangle ABC, angle ABC = 120.0, if DE and FG bisect AB and BC perpendicularly, then the degree of angle EBF is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/420.png", "question": "As shown in the figure, in Rttriangle ABC, angle BAC = 90.0, rotate triangle ABC clockwise around point A by 90.0 to obtain triangle AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032, if angle CC\u2032B\u2032 = 33.0, then the size of angle B is ()", "ground_truth": "78\u00b0"}
+{"image_path": "./Geo170K/images/test/421.png", "question": "As shown in the figure, point C is on line AB, point D is the midpoint of AC, if CD = 3.0, AB = 10.0, then the length of BC is ()", "ground_truth": "4cm"}
+{"image_path": "./Geo170K/images/test/422.png", "question": "As shown in the figure, AD is the midline of triangle ABC, and it is known that the perimeter of triangle ABD is 22.0, and AB is longer than AC by 3.0, then the perimeter of triangle ACD is ()", "ground_truth": "19cm"}
+{"image_path": "./Geo170K/images/test/423.png", "question": "As shown in the figure, the line segment AB = 20.0, C is the midpoint of AB, D is the point on CB, E is the midpoint of DB, and EB = 3.0, then CD is equal to ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/424.png", "question": "As shown in the figure, C and D are two points on the line segment AB. If CB = 4.0, DB = 7.0, and D is the midpoint of AC, then AB = ()", "ground_truth": "10cm"}
+{"image_path": "./Geo170K/images/test/425.png", "question": "As shown in the figure, a supermarket shopping cart is placed on a horizontal ground, and its lateral quadrilateral ABCD is in the same plane as a horizontal line on the ground, and AB parallel  l, if angle A = 93.0, angle D = 111.0, then the degree of the acute angle between the straight line CD and l is ()", "ground_truth": "24\u00b0"}
+{"image_path": "./Geo170K/images/test/426.png", "question": "As shown in the figure, in triangle ABC, angle B = angle C, D is a point on edge BC, point E is on edge AC, angle ADE = angle AED, if angle BAD = 24.0, then angle CDE = ()", "ground_truth": "12\u00b0"}
+{"image_path": "./Geo170K/images/test/427.png", "question": "As shown in the figure, AB is the chord of circle O, OC perpendicular  AB at point D, and it intersects circle O at point C, if the radius is 5.0, OD = 3.0, then the length of chord AB is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/428.png", "question": "As shown in the figure, AB is the diameter of circle O, O is the center of the circle, the chord CD perpendicular  AB at E, AB = 10.0, CD = 8.0, then the length of OE is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/429.png", "question": "As shown in the figure, the radius of circle O is OA = 5.0, and the arc with A as the center and OA as the radius intersects circle O at the two points B and C, then the length of the chord BC is equal to ()", "ground_truth": "5\u221a{3}"}
+{"image_path": "./Geo170K/images/test/430.png", "question": "As shown in the figure, C and D are two points on the line segment AB. If BC = 3.0, BD = 5.0, and D is the midpoint of AC, then the length of AC is ()", "ground_truth": "4cm"}
+{"image_path": "./Geo170K/images/test/431.png", "question": "As shown in the figure, in circle O, AB is the chord, OC perpendicular  AB, the foot of perpendicular is C, if AB = 16.0, OC = 6.0, then the diameter of circle O is equal to ()", "ground_truth": "20"}
+{"image_path": "./Geo170K/images/test/432.png", "question": "As shown in the figure, in the circle O with a radius of 10.0, the radius OC is perpendicular to the chord AB to the point D, AB = 16.0, then the length of CD is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/433.png", "question": "As shown in the figure, P is a point on the chord AB of circle O, AB = 10.0, AP = 4.0, OP = 5.0, then the radius of circle O is. ()", "ground_truth": "7"}
+{"image_path": "./Geo170K/images/test/434.png", "question": "As shown in the figure, the rectangle intersects with circle O, if AB = 4.0, BC = 5.0, DE = 3.0, then the length of EF is ()", "ground_truth": "7"}
+{"image_path": "./Geo170K/images/test/435.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord, AB perpendicular  CD, the foot of perpendicular is point E, connect OD, CB, AC, angle DOB = 60.0, EB = 2.0, then the length of CD is ()", "ground_truth": "4\u221a{3}"}
+{"image_path": "./Geo170K/images/test/436.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord, AB perpendicular  CD at point E, if the radius is 5.0, OE = 3.0, then the length of CD is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/437.png", "question": "As shown in the figure, in the circle O with a radius of 5.0, the length of the chord AB is 8.0, then the distance from the center O to the chord AB is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/438.png", "question": "As shown in the figure, AB = 8.0, AD = BC = 5.0, then CD is equal to ()", "ground_truth": "2cm"}
+{"image_path": "./Geo170K/images/test/439.png", "question": "As shown in the figure, C and D are two points on the line segment AB. If CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AB is equal to ()", "ground_truth": "10cm"}
+{"image_path": "./Geo170K/images/test/440.png", "question": "As shown in the figure, point C is on line AB, point E is the midpoint of AC, and point D is the midpoint of BC. If ED = 6.0, the length of the line segment AB is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/441.png", "question": "As shown in the figure, it is known that angle 1 = 40.0, angle A + angle B = 140.0, then the degree of angle C + angle D is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/442.png", "question": "As shown in the figure, the diagonals of the quadrilateral ABCD AC perpendicular  BD, the foot of perpendicular is O, and AC = 12.0, BD = 9.0, then the area of \u200b\u200bthe quadrilateral ABCD is ()", "ground_truth": "54"}
+{"image_path": "./Geo170K/images/test/443.png", "question": "As shown in the figure, it is known that triangle ABC, point D is on the extended line of BC, angle ACD = 140.0, angle ABC = 50.0, then the size of angle A is ()", "ground_truth": "90\u00b0"}
+{"image_path": "./Geo170K/images/test/444.png", "question": "As shown in the figure, point O is on the straight line AB, if angle 2 = 140.0, then the degree of angle 1 is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/445.png", "question": "As shown in the figure, line segment AB = 10.0, M is the midpoint of line segment AB, C is the midpoint of line segment MB, N is a point of line segment AM, and MN = 1.0, the length of line segment NC ()", "ground_truth": "3.5"}
+{"image_path": "./Geo170K/images/test/446.png", "question": "Suppose BF intersects AC at point P, AE intersects DF at point Q. If angle APB = 126.0, angle AQF = 100.0, then angle A-angle F = ()", "ground_truth": "46\u00b0"}
+{"image_path": "./Geo170K/images/test/447.png", "question": "As shown in the figure, BD and CE are the height and angular bisector of triangle ABC respectively, and they intersect with point O. If angle BCA = 70.0, then the degree of angle BOE is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/448.png", "question": "As shown in the figure, it is known that points A, B, and C are on the same straight line, AB = 7.0, BC = 3.0, point D is the midpoint of line segment AC, and the length of line segment DB is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/449.png", "question": "As shown in the figure, in the quadrilateral ABCD, the angular bisector of angle DAB and the bisector of exterior angle of angle ABC intersect at point P, and angle angle D + angle C = 200.0, then angle P = ()", "ground_truth": "10\u00b0"}
+{"image_path": "./Geo170K/images/test/450.png", "question": "As shown in the figure, BP bisects angle ABC and it intersects CD at point F, DP bisects angle ADC and it intersects AB at point E, if angle A = 40.0, angle P = 38.0, then the degree of angle C is ()", "ground_truth": "36\u00b0"}
+{"image_path": "./Geo170K/images/test/451.png", "question": "As shown in the figure, extend the line segment AB to C with the length of 8.0, so that BC = 4.0, M and N are the midpoints of AB and BC respectively, then the length of MN is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/452.png", "question": "As shown in the figure, the four points A, B, C, and D are all on circle O, angle BOD = 110.0, then the degree of angle BCD is ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/453.png", "question": "As shown in the figure, the quadrilateral ABCD is an inscribed quadrilateral of circle O, angle BCD = 110.0, then the degree of angle BOD is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/454.png", "question": "As shown in the figure, a cargo ship sails from point A to point D in the east direction at a speed of 24.0 nautical mile/hour. At point A, a certain island C is measured in the direction 60.0 east by north. The cargo ship arrived at point B after sailing for 30.0 minutes. At this time, it was measured that the island is in the direction 30.0 east by north. Then the shortest distance between the cargo ship and the island C is ()", "ground_truth": "6\u221a{3}\u6d77\u91cc"}
+{"image_path": "./Geo170K/images/test/455.png", "question": "As shown in the figure, PA, PB are tangent to circle O at points A, B, point C is a point on circle O, and angle P = 36.0, then angle ACB = ()", "ground_truth": "72\u00b0"}
+{"image_path": "./Geo170K/images/test/456.png", "question": "As shown in the figure, PA and PB are tangent to circle O at A and B respectively, angle C = 55.0, then angle P is equal to ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/457.png", "question": "As shown in the figure, in a square grid with edge length 1.0. Connect grid points D, N and E, C, DN and EC intersect at point P, then tanangle CPN is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/458.png", "question": "As shown in the figure, in Rttriangle ABC, angle C = 90.0, AB = 10.0, AC = 8.0, then sinB is equal to ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/459.png", "question": "As shown in the figure, the quadrilateral ABCD is the circumscribed quadrilateral of circle O, and AB = 10.0, CD = 12.0, then the perimeter of the quadrilateral ABCD is ()", "ground_truth": "44"}
+{"image_path": "./Geo170K/images/test/460.png", "question": "It is known that for a horizontally placed cylindrical drainage pipe, the radius of the pipe section is 1.0, if the water surface is high 0.2. Then the width of the water surface of the drainage pipe section is ()", "ground_truth": "1.2m"}
+{"image_path": "./Geo170K/images/test/461.png", "question": "As shown in the figure, A, B, C are the three points on circle O, AB, AC are on the both sides of the center O, if angle ABO = 20.0, angle ACO = 30.0, then the degree of angle BOC is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/462.png", "question": "As shown in the figure, in the rectangular coordinate system xOy, point A is on the positive semi-axis of the y-axis, points B and C are on the positive semi-axis of x, and angle BAC = angle ACB = 30.0, AC = 4.0, point D is a moving point on the x-axis, the symmetrical points of point D with respect to the straight lines AB and AC are E and F, then the minimum value of the line segment EF is equal to ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/463.png", "question": "As shown in the figure, angle BAC = 110.0, if A and B are symmetrical with respect to the line MP, A and C are symmetrical with respect to the line NQ, then the size of angle PAQ is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/464.png", "question": "As shown in the figure, AB parallel  CD, BE perpendicularly bisects AD, DC = BC, if angle A = 70.0, then angle C = ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/465.png", "question": "As shown in the figure, in triangle ABC, AB = 10.0, AC = 18.0, point M starts from point A and moves to point B at a speed of 2.0 per second, and point N starts from point C and moves to point A at a speed of 3.0 per second. One of the moving points reaches the endpoint, and the other moving point also stops. When triangle AMN is an isosceles triangle with MN as the base, the movement time is ()", "ground_truth": "3.6\u79d2"}
+{"image_path": "./Geo170K/images/test/466.png", "question": "As shown in the figure, in triangle ABC, angle ABC = 110.0, AM = AN, CN = CP, then angle MNP = ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/467.png", "question": "As shown in the figure, it is known that the bisectors of the four inner corners of parallelogram ABCD intersect at points E, F, G, and H respectively. Connect AC. If EF = 2.0, FG = GC = 5.0, then the length of AC is ()", "ground_truth": "13"}
+{"image_path": "./Geo170K/images/test/468.png", "question": "As shown in the figure, parallelogram ABCD, points E and F are on AD and AB respectively, and connect EB, EC, FC, and FD in turn. The area of \u200b\u200bthe shaded part in the figure is S~ 1 ~, S~ 2 ~, S~ 3 ~ , S~ 4 ~, S~ 1 ~ = 1.0, S~ 2 ~ = 2.0, S~ 3 ~ = 3.0, then the value of S~ 4 ~ is ()", "ground_truth": "7"}
+{"image_path": "./Geo170K/images/test/469.png", "question": "As shown in the figure, in parallelogram ABCD, the diagonal AC and BD intersect at point O, and points E and F are the midpoints of AB and AO respectively. Connect EF. If EF = 3.0, the length of BD is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/470.png", "question": "As shown in the figure, parallelogram ABCD's diagonal AC, BD intersect at O, EF passes through point O, and intersects AD, BC at E, F respectively. It is known that the area of \u200b\u200bparallelogram ABCD is 20.0 ^2.0, then the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "5cm^2^"}
+{"image_path": "./Geo170K/images/test/471.png", "question": "As shown in the figure, in parallelogram ABCD, the bisector of angle BCD intersects AD at point E, and it intersects the extended line of BA at point F, BF = 4 AF, BC = 12.0, then the length of AF is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/472.png", "question": "As shown in the figure, in parallelogram ABCD, AB = 10.0, AD = 15.0, AC and BD intersect at point O. OE perpendicular  BD and it intersects AD at E, then the perimeter of triangle ABE is ()", "ground_truth": "25cm"}
+{"image_path": "./Geo170K/images/test/473.png", "question": "As shown in the figure, in the parallelogram ABCD, E and F are the midpoints of AD and BC respectively, P is the moving point on the edge DC, G and H are the midpoints of PE and PF respectively, it is known that DC = 10.0, then length of GH is ()", "ground_truth": "5cm"}
+{"image_path": "./Geo170K/images/test/474.png", "question": "As shown in the figure, in the parallelogram ABCD, the diagonals AC and BD intersect at the point O, and the point E is the midpoint of CD. Connect OE. If the perimeter of the parallelogram ABCD is 24.0 and BD = 8.0, then the perimeter of triangle DOE is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/475.png", "question": "As shown in the figure, in the parallelogram ABCD, point E is a point on AB. Connect DE and CE. If DE and CE are the angular bisectors of angle ADC and angle BCD, and AB = 4.0, then the perimeter of the parallelogram ABCD is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/476.png", "question": "As shown in the figure, make three parallel lines through a point in the triangle. If the perimeter of the triangle is 6.0, then the sum of the perimeters of the three shaded triangles in the figure is ()", "ground_truth": "6cm"}
+{"image_path": "./Geo170K/images/test/477.png", "question": "As shown in the figure, in triangle ABC, the straight line DE parallel BC, angle ABC, angle ACB passing through the vertex A intersects DE at points E and D, respectively. If AC = 3.0, AB = 4.0, then the length of DE is ()", "ground_truth": "7"}
+{"image_path": "./Geo170K/images/test/478.png", "question": "As shown in the figure, in triangle ABC, angle B = angle C, D is on BC, angle BAD = 50.0, AE = AD, then the degree of angle EDC is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/479.png", "question": "As shown in the figure, in the quadrilateral ABCD, AD parallel  BC, BF bisects angle ABC and it intersects AD at point F, CE bisects angle BCD, and it intersects AD at point E, AB = 8.0, CD = 6.0, EF = 2.0, then the length of AD is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/480.png", "question": "As shown in the figure, in triangle ABC, AB = 10.0, AC = 6.0, the straight line DE parallel  CB passing through point A, the bisectors of angle ABC and angle ACB intersect DE at E, D respectively, then the length of DE is ()", "ground_truth": "16"}
+{"image_path": "./Geo170K/images/test/481.png", "question": "As shown in the figure, it is known that the length of one waist AB of the isosceles triangle ABC is 4.0 centimetres. Cross any point D on the bottom edge BC to draw two waist parallel lines, and they intersect the two waists at E and F respectively, then the perimeter of the quadrilateral AEDF is ()", "ground_truth": "8\u5398\u7c73"}
+{"image_path": "./Geo170K/images/test/482.png", "question": "As shown in the figure, in the equilateral triangle ABC, BD bisects angle ABC and it intersects AC at point D, and cross D to draw DE perpendicular  BC at point E, and CE = 1.5, then the length of AB is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/483.png", "question": "A ship departs from point A on the sea level and travels 40.0 nautical miles to the west by south direction 40.0 to point B, and then travels 40.0 nautical miles from point B to the west by north 20.0 direction to point C, then the distance between A and C is ( )", "ground_truth": "40\u6d77\u91cc"}
+{"image_path": "./Geo170K/images/test/484.png", "question": "As shown in the figure, there is the \"herringbone\" steel frame, where the inclined beam AB = AC, the top angle angle BAC = 120.0, the span BC = 10.0, AD is the pillar (ie the center line of the bottom BC), two support frames DE perpendicular  AB, DF perpendicular  AC, then DE + DF is equal to ()", "ground_truth": "5m"}
+{"image_path": "./Geo170K/images/test/485.png", "question": "Translate triangle ABC to the right along CB to get triangle DEF. If the area of \u200b\u200bthe quadrilateral ABED is equal to 32.0, the translation distance is equal to ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/486.png", "question": "As shown in the figure, in triangle ABC, angle ACB = 90.0, angle ABC = 60.0, BD bisects angle ABC, P point is the midpoint of BD, if BD = 6.0, the length of CP is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/487.png", "question": "As shown in the figure, the height of the floor of a truck compartment from the ground is frac {3.0}{2.0}. In order to facilitate the loading, a wooden board is often used to form an inclined plane. If the angle between the inclined plane and the horizontal ground is not greater than 30.0, the length of this wooden board is at least ( )", "ground_truth": "3\u7c73"}
+{"image_path": "./Geo170K/images/test/488.png", "question": "As shown in the figure, in Rttriangle ABC, angle ACB = 90.0, AC = 6.0, BC = 8.0, AD is the bisector of angle BAC. If P and Q are the moving points on AD and AC respectively, then the minimum value of PC + PQ is ()", "ground_truth": "\\frac{24}{5}"}
+{"image_path": "./Geo170K/images/test/489.png", "question": "As shown in the figure, in the quadrilateral ABCD, angle BAD = 130.0, angle B = angle D = 90.0, points E and F are the moving points on the line segments BC and DC, respectively. When the perimeter of triangle AEF is the smallest, then the degree of angle EAF is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/490.png", "question": "As shown in the figure, the perimeter of triangle ABC is 16.0. Point D is the midpoint of the AB, BD = 2.0, passing point D is the vertical line l of AB, and E is any point on l, then the minimum perimeter of triangle AEC is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/491.png", "question": "As shown in the figure, OA and OB are the perpendicular bisectors of the line segments MC and MD respectively, MD = 5.0, MC = 7.0, CD = 10.0, a small ant starts from point M and climbs to any point E on OA, and then climbs to any point F on OB , and then climbs back to point M, the shortest path the little ant crawls can be ()", "ground_truth": "10cm"}
+{"image_path": "./Geo170K/images/test/492.png", "question": "As shown in the figure, in triangle ABC, BF bisects angle ABC, crossing point A to draw AF perpendicular  BF, the foot of perpendicular is F and extend BC to point G, D is the midpoint of AB. Connect DF and extend to intersect AC at point E. If AB = 12.0, BC = 20.0, then the length of the line segment EF is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/493.png", "question": "As shown in the figure, in triangle ABC, points D and E are the midpoints of AB and AC respectively. If DE = 1.5, the length of BC is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/494.png", "question": "As shown in the figure, in triangle ABC, BD and CE are angular bisectors, AM perpendicular  BD at point M, AN perpendicular  CE at point N. The perimeter of triangle ABC is 30.0, BC = 12.0. Then the length of MN is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/495.png", "question": "As shown in the figure, in triangle ABC, D and E are the midpoints of BC and AC respectively. BF bisects angle ABC and intersects DE at point F. If BC = 6.0, then the length of DF is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/496.png", "question": "The students have all played the game of seesaw. The picture is a schematic diagram of a seesaw. The column OC is perpendicular to the ground, OA = OB. When one end of the seesaw A touches the ground, angle AOA\u2032 = 50.0, then when the other end B of the seesaw touches the ground, angle COB\u2032 is equal to ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/497.png", "question": "As shown in the figure, in triangle ABC, AB = AC, angle A = 40.0, DE bisects AC perpendicularly, then the degree of angle BCD is equal to ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/498.png", "question": "As shown in the figure, PA and PB are two tangents of circle O with radius 1.0, points A and B are tangent points respectively, angle APB = 60.0, OP intersects chord AB at point C, and intersects circle O at point D. Then the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "\\frac{1}{6}\u03c0"}
+{"image_path": "./Geo170K/images/test/499.png", "question": "As shown in the figure, in triangle ABC, angle A = 90.0, AB = AC = 3.0, now rotate triangle ABC anticlockwise around point B by a certain angle, point C\u2032 falls on the straight line where the height of side BC is located, then the area swept by BC during the rotation of edge BC is ()", "ground_truth": "3\u03c0"}
+{"image_path": "./Geo170K/images/test/500.png", "question": "As shown in the figure, the sector OAB and the sector OCD whose central angles are all 90.0 are stacked together, OA = 3.0, OC = 1.0, respectively connect AC and BD, then the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "2\u03c0"}
+{"image_path": "./Geo170K/images/test/501.png", "question": "As shown in the figure, in order to green the environment, four sector open spaces with a radius of 1.0 are drawn at the four corners of the rectangular open space for greening, then the total green area is ()", "ground_truth": "\u03c0"}
+{"image_path": "./Geo170K/images/test/502.png", "question": "The lateral surface of a staircase is shown in the figure. The measured length of AB is 3.0, and the slope ratio of the stair slope BC is 1.0:2.0, then the length of the slope BC of the staircase is ()", "ground_truth": "3\u221a{5}\u7c73"}
+{"image_path": "./Geo170K/images/test/503.png", "question": "At a certain moment, there is a passenger ship at sea point P, and lighthouse A is measured in the direction 30.0 north by east of P, and is 50.0 nautical miles away. The passenger ship sails at the speed of 60.0 nautical mile/hour in the direction of 60.0 from north by west for $frac {2.0}{3.0}$hours to reach point B, then tanangle BAP = ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/504.png", "question": "As shown in the figure, it is known that there is a laser auxiliary signal within a certain range of the lighthouse M. A ship is sailing at a constant speed from south by north at a constant speed at sea. The ship measured at A and measured that the lighthouse M was in the direction 30.0 to the east by north, and it traveled 1.0. Arrived at point B after hours, and just entered the laser signal area of \u200b\u200blighthouse M at this time. It is measured that lighthouse M is in the direction of 45.0 east by north, then the time for the ship to pass the laser signal area of \u200b\u200blighthouse M is ()", "ground_truth": "(\u221a{3}+1)\u5c0f\u65f6"}
+{"image_path": "./Geo170K/images/test/505.png", "question": "As shown in the figure, at 8.0 in the morning, a ship departs from point A and sails northward at a speed of 15.0 nautical miles/hour, and arrives at point B at 9.0 and 40.0 minutes. From point A, lighthouse C is measured in the direction 26.0 west by north. From point B, lighthouse C is measured in the 52.0 direction west of north, then the distance from point B to lighthouse C is ()", "ground_truth": "25\u6d77\u91cc"}
+{"image_path": "./Geo170K/images/test/506.png", "question": "At 9.0 in the morning, a ship departs from point A and sails in the direction due east at a speed of 40.0 nautical miles per hour, and arrives at point B at 9.0 and 30.0 minutes. As shown in the figure, the island M is measured from A and B. In the direction of 45.0 north by east and 15.0 north by east, then the distance between B and island M is ()", "ground_truth": "20\u221a{2}\u6d77\u91cc"}
+{"image_path": "./Geo170K/images/test/507.png", "question": "In order to measure the width of parallel river AB, angle ACB = 30.0, angle ADB = 60.0, CD = 60.0, then the width of the river AB is ()", "ground_truth": "30\u221a{3}m"}
+{"image_path": "./Geo170K/images/test/508.png", "question": "As shown in the figure, it is known that a fisherman on a fishing boat sees lighthouse M in the direction 60.0 east by north at point A. This fishing boat sails eastward at a speed of 28.0 nautical miles/hour, and arrives at point B in half an hour, and sees it at point B The lighthouse M is in the 15.0 direction to the east by north. At this time, the distance between the lighthouse M and the fishing boat is ()", "ground_truth": "7\u221a{2}\u6d77\u91cc"}
+{"image_path": "./Geo170K/images/test/509.png", "question": "As shown in the figure, it is known thatfrac {OA}{DO}=frac {BO}{CO}=frac {1.0}{2.0}, the area of \u200b\u200btriangle AOB is 100.0 ^ 2, then the area of \u200b\u200btriangle DOC is ()", "ground_truth": "400cm\u00b2"}
+{"image_path": "./Geo170K/images/test/510.png", "question": "As shown in the figure, in triangle ABC, angle BAC = 90.0, AD perpendicular  BC at D, if AB = 3.0, BC = 5.0, then the length of DC ()", "ground_truth": "\\frac{16}{5}"}
+{"image_path": "./Geo170K/images/test/511.png", "question": "As shown in the figure, in the parallelogram ABCD, AE:EB=1.0:2.0,S~triangle AEF~=3.0, then S~triangle FCD~ is ()", "ground_truth": "27"}
+{"image_path": "./Geo170K/images/test/512.png", "question": "As shown in the figure, in the parallelogram ABCD, AE = EB, AF = 2.0, then the value of FC is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/513.png", "question": "As shown in the figure, in Rttriangle ABC, angle BAC = 90.0, AD perpendicular  BC at D, DE perpendicular  AB at E, AD = 3.0, DE = 2.0, then the length of CD is ()", "ground_truth": "\\frac{3\u221a{5}}{2}"}
+{"image_path": "./Geo170K/images/test/514.png", "question": "As shown in the figure, the known point D is the midpoint of AB, AF parallel  BC, CG:GA=3.0:1.0,BC=8.0, then AF is equal to ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/515.png", "question": "As shown in the figure, it is known that the radius of circle O is 6.0, M is a point outside circle O, and OM = 12.0, the line passing M and circle O intersect at A and B, the symmetrical points of points A and B with respect to OM are C, D, AD and BC intersect at point P, then the length of OP is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/516.png", "question": "As shown in the figure, in parallelogram ABCD, E is the midpoint of CD, AE intersects BD at point O, S~triangle DCE~ = 12.0, then S~triangle AOD~ is equal to ()", "ground_truth": "24"}
+{"image_path": "./Geo170K/images/test/517.png", "question": "As shown in the figure, in triangle ABC, angle ACB = 90.0, D is the point on AB, connect CD, angle ACD = angle B, if BC = 13.0, CD = 5.0, then BD = ()", "ground_truth": "12cm"}
+{"image_path": "./Geo170K/images/test/518.png", "question": "As shown in the figure, it is known that D and E are the points on AB and AC in triangle ABC, DE parallel  BC and frac {AD}{AB}=frac {1.0}{3.0}, the perimeter of triangle ADE is 2.0, then the perimeter of triangle ABC is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/519.png", "question": "As shown in the figure, in triangle ABC, D is a point on AC, if angle DBC = angle A, BC = 3.0, AC = 6.0, then the length of CD is ()", "ground_truth": "\\frac{3}{2}"}
+{"image_path": "./Geo170K/images/test/520.png", "question": "As shown in the figure, DE parallel  BC, BD, CE intersect at O, frac {EO}{OC}=frac {1.0}{3.0}, AE = N_3, then EB = ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/521.png", "question": "As shown in the figure, a beam of light reflects from point A (-3.0, 3.0), through point C on the y axis, and then passes through point B (-1.0, 0.0), then the length of the path of the light from point A to point B is ()", "ground_truth": "5"}
+{"image_path": "./Geo170K/images/test/522.png", "question": "As shown in the figure, in triangle ABC, if DE parallel  BC, frac {AD}{AB}=frac {1.0}{3.0}, DE = 4.0, then the length of BC is ()", "ground_truth": "12cm"}
+{"image_path": "./Geo170K/images/test/523.png", "question": "As shown in the figure, it is known that D, E, and F are points on the side BC, CA, and AB of isosceles triangle ABC respectively. If AB = AC, angle FDE = angle B, BD = 2.0, CD = 3.0, CE = 4.0, AE = 1.0, then the length of AF is ()", "ground_truth": "3.5"}
+{"image_path": "./Geo170K/images/test/524.png", "question": "As shown in the figure, the cross section of a small reservoir dam is a right trapezoid, the width of crest BC is 6.0, the height of dam is 14.0, and the slope of the slope CD is i = 1.0:2.0, then the length of the dam bottom AD is ()", "ground_truth": "34m"}
+{"image_path": "./Geo170K/images/test/525.png", "question": "As shown in the figure, the slope of the slope formed by the conveyor belt and the ground is 1.0:2.0, it sends the object from the ground point A to the point B higher than the ground 2.0, then the distance the object travels from A to B is ()", "ground_truth": "2\u221a{5}"}
+{"image_path": "./Geo170K/images/test/526.png", "question": "As shown in the figure, in triangle ABC, AB = AC = 18.0, BC = 12.0, the vertices E and F of the square DEFG are in triangle ABC, the vertices D and G are on AB and AC respectively, AD = AG, DG = 6.0, then the distance from point F to BC is ()", "ground_truth": "6\u221a{2}-6"}
+{"image_path": "./Geo170K/images/test/527.png", "question": "As shown in the figure, in the square ABCD with edge length 9.0, F is a point on AB. Connect CF. Pass point F to draw FE perpendicular  CF which intersects AD at point E, if AF = 3.0, then AE is equal to ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/528.png", "question": "As shown in the figure, in Rttriangle ABC, angle BAC = 90.0, AB = 2.0, AC = 3.0, D is the midpoint of BC, and moving points E and F are on AB and AC respectively, passing points to draw EG parallel  AD parallel  FH, and they intersect BC at points G and H, if EF parallel  BC, then the value of EF + EG + FH is ()", "ground_truth": "\u221a{13}"}
+{"image_path": "./Geo170K/images/test/529.png", "question": "As shown in the figure, in triangle ABC, D and E are points on AB and AC respectively, and DE parallel  BC, if AD = 5.0, DB = 3.0, DE = 4.0, then BC is equal to ()", "ground_truth": "\\frac{32}{5}"}
+{"image_path": "./Geo170K/images/test/530.png", "question": "As shown in the figure, in the parallelogram ABCD, E is the midpoint of DC, the area of \u200b\u200btriangle DEF is 2.0, then the area of \u200b\u200btriangle ABF is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/531.png", "question": "As shown in the figure, AB parallel  CD, frac {AO}{OD}=frac {2.0}{3.0}, then the ratio of the perimeter of triangle AOB to the perimeter of triangle DOC is ()", "ground_truth": "\\frac{2}{3}"}
+{"image_path": "./Geo170K/images/test/532.png", "question": "As shown in the figure, AB parallel  CD, AC, BD intersect at O, BO = 6.0, DO = 3.0, AC = 12.0, then the length of AO is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/533.png", "question": "As shown in the figure, in triangle ABC, E and F are the midpoints of AB and AC respectively. If the area of \u200b\u200btriangle AEF is 1.0, then the area of \u200b\u200bthe quadrilateral EBCF is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/534.png", "question": "As shown in the figure, in the trapezoidal ABCD, AD parallel  BC, diagonal AC, BD intersect at point O, if S~triangle AOD~:S~triangle OCD~ = 1.0:2.0, then S~triangle AOD~:S~triangle BOC~ = ()", "ground_truth": "\\frac{1}{4}"}
+{"image_path": "./Geo170K/images/test/535.png", "question": "As shown in the figure, planting trees on the hillside, it is known that angle A = 30.0, AC = 3.0, the distance of slope AB of two adjacent trees is equal to ()", "ground_truth": "2\u221a{3}m"}
+{"image_path": "./Geo170K/images/test/536.png", "question": "As shown in the figure, the elevation angle of the top of a building is 30.0 when viewed from point A in the air by a hot air balloon, and the depression angle of this building is 60.0. The horizontal distance between the hot air balloon and the building is 120.0. The height of this building is ()", "ground_truth": "160\u221a{3}m"}
+{"image_path": "./Geo170K/images/test/537.png", "question": "As shown in the figure, a teaching interest group wants to measure the height of a tree CD. They firstly measured the elevation angle of the tree top C at point A as 30.0, and then proceeded 10.0 along the direction of AD to point B, and the elevation angle of tree top C measured at B is 60.0  (the three points A, B, and D are on the same straight line), then the height of the tree CD is ()", "ground_truth": "5\u221a{3}m"}
+{"image_path": "./Geo170K/images/test/538.png", "question": "As shown in the figure, in order to measure the height of the TV tower AB, use the goniometer CD with a height of 1.0 at D, and measure the elevation angle of the top A of the TV tower to be 30.0, and then walk 120.0 in the direction of the TV tower to F, and the elevation angle of the top A of the TV tower is 60.0, then the height of this TV tower AB (unit:) is ()", "ground_truth": "60\u221a{3}+1"}
+{"image_path": "./Geo170K/images/test/539.png", "question": "As shown in the figure, in a mathematics extracurricular practice activity, Xiaowen measured the elevation angle of the top A of the tree at point C to be 37.0, BC = 20.0, then the height of the tree AB is () (reference data: sin37\u00b0 approximate 0.6, cos37\u00b0 approximate  0.8, tan37\u00b0 approximate 0.75)", "ground_truth": "15m"}
+{"image_path": "./Geo170K/images/test/540.png", "question": "As shown in the figure, to build a highway in a certain place, a tunnel must be built from B to C (B and C are on the same level). In order to measure the distance between B and C, an engineer took a hot air balloon to start from C and rose vertically 100.0 to reach A. Observing the depression angle of B at A is 30.0, then the distance between B and C is ()", "ground_truth": "100\u221a{3}m"}
+{"image_path": "./Geo170K/images/test/541.png", "question": "In the mathematics practice inquiry class, the teacher arranged for the students to measure the height of the school flagpole. As shown in the figure, Xiao Ming's study group is at a distance of 10.0 from the bottom of the flagpole. The elevation angle of the top of the flagpole is measured with a goniometer as 60.0, then the height of the flagpole is ().", "ground_truth": "10\u221a{3}"}
+{"image_path": "./Geo170K/images/test/542.png", "question": "As shown in the figure, to measure the height AB of a tower that cannot be reached at the bottom, two students of A and B took measurements at C and D respectively. Given that the points B, C and D are on the same straight line, and AB perpendicular  BD, CD = 12.0, angle ACB = 60.0, angle ADB = 30.0, the height of the tower AB is ()", "ground_truth": "6\u221a{3}\u7c73"}
+{"image_path": "./Geo170K/images/test/543.png", "question": "As shown in Figure 1, the clock face of a clock is fixed perpendicularly on the horizontal desktop, and there is a point A on the minute hand, and when the clock face displays 3.0 o'clock 30.0 minutes, the minute hand is perpendicular to the desktop, and the height from point A to the desktop is 10.0 cm. As shown in Figure 2, if the clock face displays 3.0 o'clock and 45.0 minutes, and the height of point A from the desktop is 16.0 cm, then the clock face displays 3.0 o'clock and 50.0 minutes, how many centimeters is the height of point A from the desktop ()", "ground_truth": "19"}
+{"image_path": "./Geo170K/images/test/544.png", "question": "As shown in the figure, PA and PB are the tangents of circle O, AC is the diameter of circle O, angle P = 50.0, then the degree of angle BOC is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/545.png", "question": "As shown in the figure, in Rttriangle ABC, AD perpendicular  BC at D, DE perpendicular  AB at E, if AD = 3.0, DE = 2.0, then AC = ()", "ground_truth": "\\frac{9}{2}"}
+{"image_path": "./Geo170K/images/test/546.png", "question": "As shown in the figure, in triangle ABC, AB = BC = 2.0, circle O with AB as the diameter is tangent to BC at point B, then AC is equal to ()", "ground_truth": "2\u221a{2}"}
+{"image_path": "./Geo170K/images/test/547.png", "question": "Definition: The minimum value of the distance between a fixed point A and any point on circle O is called the distance between point A and circle O. There is a rectangle ABCD (as shown in the figure), AB = 14.0, BC = 12.0, circle K and the edges AB, BC, and CD of the rectangle are respectively tangent to the points E, F, G, then the distance between point A and circle K is ()", "ground_truth": "4cm"}
+{"image_path": "./Geo170K/images/test/548.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, angle BOC = 3.0 angle AOB, if angle ACB = 20.0, then the degree of angle BAC is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/549.png", "question": "As shown in the figure, AB is the diameter of circle O, C and D are two points on circle O, CD perpendicular  AB, if angle DAB = 70.0, then angle BOC = ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/550.png", "question": "As shown in the figure, A, B, and C are all points on circle O, if angle ABC = 110.0, then the degree of angle AOC is ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/551.png", "question": "Point B is on circle O, point C is a point different from A and B on circle O, if angle AOB = 50.0, then the degree of angle ACB is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/552.png", "question": "As shown in the figure, given that points A, B, and C are on circle O, angle AOB = 100.0, then the degree of angle ACB is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/553.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord of circle O, angle ABD = 59.0, then angle C is equal to ()", "ground_truth": "31\u00b0"}
+{"image_path": "./Geo170K/images/test/554.png", "question": "As shown in the figure, in the square ABCD, AB = 8.0, Q is the midpoint of CD, set angle DAQ = \u03b1, take a point P on CD, make angle BAP = 2.0 \u03b1, then the length of CP is ()", "ground_truth": "2"}
+{"image_path": "./Geo170K/images/test/555.png", "question": "As shown in the figure, the intersection of the two diagonals of the rectangle is 60.0, AC + BD = 20.0, then the length of AB is ()", "ground_truth": "5cm"}
+{"image_path": "./Geo170K/images/test/556.png", "question": "As shown in the figure, in the diamond ABCD, angle BAD = 120.0, the length of the diagonal AC is 3.0, then the perimeter of the diamond ABCD is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/557.png", "question": "As shown in the figure, angle MON = 90.0, moving points A and B are respectively located on the radials OM and ON, the edge AB of the rectangle ABCD = 6.0, BC = 4.0, then the maximum length of the line segment OC is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/558.png", "question": "As shown in the figure, in the diamond ABCD, angle BAD = 120.0, BC = 10.0, then the length of the diagonal AC is equal to ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/559.png", "question": "As shown in the figure, the perimeter of the diamond ABCD is 16.0, angle A = 60.0, then the length of the diagonal BD is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/560.png", "question": "As shown in the figure, in the diamond ABCD, AB = 5.0, angle B = 60.0, then the diagonal AC is equal to ()", "ground_truth": "5"}
+{"image_path": "./Geo170K/images/test/561.png", "question": "As shown in the figure, in the diamond ABCD, AB = 15.0, angle ADC = 120.0, then the distance between the two points B and D is ()", "ground_truth": "15"}
+{"image_path": "./Geo170K/images/test/562.png", "question": "As shown in the figure, in the diamond ABCD, two diagonal lines AC = 12.0, BD = 16.0, then the edge length of this diamond is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/563.png", "question": "As shown in the figure, in the diamond ABCD, angle BAD = 80.0, the perpendicular bisector of AB intersects the diagonal AC at point F, E is the foot of perpendicular. Connect DF, then angle CDF is equal to ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/564.png", "question": "As shown in the figure, in the diamond ABCD, angle B = 60.0, AB = 2.0, E and F are the midpoints of BC and CD respectively, connect AE, EF, and AF, then the perimeter of triangle AEF is ()", "ground_truth": "3\u221a{3}cm"}
+{"image_path": "./Geo170K/images/test/565.png", "question": "As shown in the figure, in parallelogram ABCD, BC = BD, angle C = 65.0, then the degree of angle ADB is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/566.png", "question": "As shown in the figure, in parallelogram ABCD, AB = 6.0, BC = 8.0, the bisector of angle C intersects AD at E, and intersects the extended line of BA at F, then the value of AE + AF is equal to ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/567.png", "question": "As shown in the figure, in parallelogram ABCD, AE perpendicular  BC is at E, AF perpendicular  DC and it intersects the extended line of DC at point F, and angle EAF = 60.0, then angle B is equal to ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/568.png", "question": "As shown in the figure, in parallelogram ABCD, AE bisects angle BAD, if CE = 3.0, AB = 4.0, then the perimeter of parallelogram ABCD is ()", "ground_truth": "22cm"}
+{"image_path": "./Geo170K/images/test/569.png", "question": "As shown in the figure, in parallelogram ABCD, AE bisects angle BAD, and it is known that angle AEB = 63.0, then the degree of angle D is ()", "ground_truth": "54\u00b0"}
+{"image_path": "./Geo170K/images/test/570.png", "question": "As shown in the figure, in parallelogram ABCD, the diagonal AC and BD intersect at point O, AC = 10.0, BD = 6.0, AD = 4.0, then the area of \u200b\u200bparallelogram ABCD is ()", "ground_truth": "24"}
+{"image_path": "./Geo170K/images/test/571.png", "question": "As shown in the figure, the diagonal of the parallelogram ABCD intersects at the point O, and AB = 6.0, the perimeter of triangle OCD is 19.0, then the sum of the two diagonals of parallelogram ABCD is ()", "ground_truth": "26"}
+{"image_path": "./Geo170K/images/test/572.png", "question": "As shown in the figure, in the parallelogram ABCD, if angle B = 60.0, then angle D is equal to ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/573.png", "question": "As shown in the figure, in the parallelogram ABCD, it is known that angle AOB = 90.0, AC = 8.0, AD = 5.0, then the length of BD is ()", "ground_truth": "6cm"}
+{"image_path": "./Geo170K/images/test/574.png", "question": "As shown in the figure, in parallelogram ABCD, CE perpendicular  AB, the foot of perpendicular is E, if angle A = 115.0, then angle BCE is equal to ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/575.png", "question": "As shown in the figure, in the parallelogram ABCD, CE bisects angle BCD and it intersects the AD edge at point E, and DE = 3.0, then the length of AB is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/576.png", "question": "In parallelogram ABCD, the diagonal AC and BD intersect at the point O, angle DAC = 42.0, angle CBD = 23.0, then angle COD is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/577.png", "question": "As shown in the figure, the diagonals AC and BD of the parallelogram ABCD intersect at point O, point E is the midpoint of CD, and the perimeter of triangle ABD is 16.0, then the perimeter of triangle DOE is ()", "ground_truth": "8cm"}
+{"image_path": "./Geo170K/images/test/578.png", "question": "As shown in the figure, in parallelogram ABCD, BM is the bisector of angle ABC and it intersects CD at point M, and MC = 2.0, the perimeter of parallelogram ABCD is 14.0, then DM is equal to ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/579.png", "question": "As shown in the figure, P is a point of parallelogram ABCD. Given that S~triangle ABP~ = 3.0, S~triangle PDC~ = 2.0, then the area of \u200b\u200bthe parallelogram ABCD is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/580.png", "question": "As shown in the figure, in parallelogram ABCD, AE bisects angle BAD and it intersects BC at point E. If AD = 8.0, EC = 2.0, then the length of AB is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/581.png", "question": "As shown in the figure, in the parallelogram ABCD, the straight line CE perpendicular  AB passing through the point C, the foot of perpendicular is E, if angle EAD = 54.0, then the degree of angle BCE is ()", "ground_truth": "36\u00b0"}
+{"image_path": "./Geo170K/images/test/582.png", "question": "As shown in the figure, in parallelogram ABCD, BD = CD, angle C = 70.0, AE perpendicular  BD at point E, then the degree of angle BAE is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/583.png", "question": "As shown in the figure, P is a point in the parallelogram ABCD, and cross point P to draw the parallel line of AB and AD to intersect the parallelogram at the four points of E, F, G, and H. If S~AHPE~ = 3.0, S~PFCG~ = 5.0 , Then S~triangle PBD~ is ()", "ground_truth": "1"}
+{"image_path": "./Geo170K/images/test/584.png", "question": "As shown in the figure, in parallelogram ABCD, angle A = 120.0, then angle 1 = ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/585.png", "question": "As shown in the figure, in parallelogram ABCD, CE perpendicular  AB, point E is the foot of perpendicular, if angle D = 55.0, then angle BCE = ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/586.png", "question": "As shown in the figure, in parallelogram ABCD, angle ABC = 60.0, AB = BC = 4.0, points M and N are on edges BC and CD respectively, and angle MAN = 60.0, then the area of \u200b\u200bthe quadrilateral AMCN is ()", "ground_truth": "4\u221a{3}cm\u00b2"}
+{"image_path": "./Geo170K/images/test/587.png", "question": "As shown in the figure, in the parallelogram ABCD, AB = 4.0, BC = 6.0, and the perpendicular bisector of AC intersects AD at point E, then the perimeter of triangle CDE is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/588.png", "question": "As shown in the figure, in parallelogram ABCD, AD = 3.0, DC = 5.0, and the perpendicular bisector of BD intersects BD at point E, then the perimeter of triangle BCE is ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/589.png", "question": "As shown in the figure, the perimeter of parallelogram ABCD is 10.0, AC and BD intersect at point O, and OE perpendicular  AC and it intersects AD at E, then the perimeter of triangle DCE is ()", "ground_truth": "5cm"}
+{"image_path": "./Geo170K/images/test/590.png", "question": "As shown in the figure, in the parallelogram ABCD, it is known that AB = 6.0, BC = 9.0, angle B = 30.0, then the area of \u200b\u200bthe parallelogram ABCD is ()", "ground_truth": "27"}
+{"image_path": "./Geo170K/images/test/591.png", "question": "As shown in the figure, in parallelogram ABCD, angle AEB = 36.0, BE bisectes angle ABC, then angle C is equal to ()", "ground_truth": "108\u00b0"}
+{"image_path": "./Geo170K/images/test/592.png", "question": "As shown in the figure, the diagonal of the parallelogram ABCD intersects at the point O, and AB = 5.0, the perimeter of triangle OCD is 23.0, then the sum of the two diagonals of the parallelogram ABCD is ()", "ground_truth": "36"}
+{"image_path": "./Geo170K/images/test/593.png", "question": "As shown in the figure, in parallelogram ABCD, the diagonal AC and BD intersect at point O, if AC = 12.0, BD = 8.0, AB = 7.0, then the perimeter of triangle OAB is ()", "ground_truth": "17"}
+{"image_path": "./Geo170K/images/test/594.png", "question": "The perimeter of the parallelogram ABCD is 28.0, AC and BD intersect at point O, the perimeter of triangle AOB is 4.0 larger than the perimeter of triangle OBC, then AB is equal to ()", "ground_truth": "9cm"}
+{"image_path": "./Geo170K/images/test/595.png", "question": "As shown in the figure, in the parallelogram ABCD, the diagonal AC and BD intersect at point O, and cross O point to draw OE parallel  BC and it intersects DC at point E. If OE = 2.5, then the length of AD is ()", "ground_truth": "5"}
+{"image_path": "./Geo170K/images/test/596.png", "question": "As shown in the figure, the diagonal AC and BD of the parallelogram ABCD intersect at the point O, AB = 7.0, AC = 10.0, the perimeter of triangle ABO is 16.0, then the length of the diagonal BD is equal to ()", "ground_truth": "8"}
+{"image_path": "./Geo170K/images/test/597.png", "question": "As shown in the figure, E is any point in parallelogram ABCD, if S~quadrilateral ABCD~ = 6.0, then the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/598.png", "question": "As shown in the figure, in the parallelogram ABCD, AB = 4.0, the bisector of angle BAD and the extended line of BC intersect at point E, and DC at point F, and point F is the midpoint of DC, DG perpendicular  AE, foot of perpendicular is G, if DG = 1.0, then the edge length of AE is ()", "ground_truth": "4\u221a{3}"}
+{"image_path": "./Geo170K/images/test/599.png", "question": "As shown in the figure, AB is the chord of circle O, passing point A to draw the tangent AC of circle O. If angle BAC = 55.0, then angle AOB is equal to ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/600.png", "question": "As shown in the figure, the line segment AB crosses the center O, intersects circle O at points A and C, angle B = 30.0, and the straight line BD and circle O tangent to point D, then the degree of angle ADB is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/601.png", "question": "As shown in the figure, in the parallelogram ABCD, AC and BD are diagonals, BC = 6.0, and the height on BC is 4.0, then the area of \u200b\u200bthe shaded part in the figure is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/602.png", "question": "As shown in the figure, the parallelogram ABCD is divided into 4.0 parallelograms. It is known that the three areas are 8.0, 10.0, and 30.0, then the area of \u200b\u200bthe fourth parallelogram is ()", "ground_truth": "24"}
+{"image_path": "./Geo170K/images/test/603.png", "question": "As shown in the figure, the diagonal AC and BD of parallelogram ABCD intersect at point O, if AC + BD = 10.0, BC = 4.0, then the perimeter of triangle BOC is ()", "ground_truth": "9"}
+{"image_path": "./Geo170K/images/test/604.png", "question": "As shown in the figure, in parallelogram ABCD, AC and BD intersect at point O, points E and F are on edges AD and BC respectively, and EF passes through point O. If AB = 3.0, BC = 5.0, EF = AB, then the perimeter of the quadrilateral CDEF is ()", "ground_truth": "11"}
+{"image_path": "./Geo170K/images/test/605.png", "question": "As shown in the figure, the perpendicular bisector of the diagonal AC of the parallelogram ABCD and the edges BC and DA intersect at E and F, respectively, and connect CF. If the perimeter of the parallelogram ABCD is equal to 18.0, then the perimeter of triangle CDF is equal to ()", "ground_truth": "9cm"}
+{"image_path": "./Geo170K/images/test/606.png", "question": "As shown in the figure, in triangle ABC, AB = AC, angle A = 40.0, draw an arc with C as the center and the length of CB as the radius, intersect AB at point D, connect CD, then angle ACD is equal to ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/607.png", "question": "As shown in the figure, in circle O, it is known that angle AOB = 110.0, C is a point on the circle, then angle ACB is ()", "ground_truth": "125\u00b0"}
+{"image_path": "./Geo170K/images/test/608.png", "question": "As shown in the figure, CD is the diameter of circle O, chord DE parallel  OA, if the degree of angle D is 50.0, then the degree of angle C is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/609.png", "question": "As shown in the figure, BD is the diameter of circle O, points A and C are on circle O, and BD perpendicular  AC, if the degree of arc AB is 60.0, then the degree of angle BDC is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/610.png", "question": "As shown in the figure, it is known that AB and AD are the chords of circle O, angle B = 20.0, point C is on chord AB, connect CO and extend CO to intersect circle O at point D, angle D = 15.0, then the degree of angle BAD is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/611.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, and it is known that angle C = 60.0, then the degree of angle BAO is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/612.png", "question": "As shown in the figure, AB is the diameter of the semicircle, angle ABC = 50.0, point D is the midpoint of arc AC, then angle DAB is equal to ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/613.png", "question": "As shown in the figure, AB is the diameter of circle O, the chord CD and AB intersect, and angle ABC = 32.0, then the degree of angle CDB is ()", "ground_truth": "58\u00b0"}
+{"image_path": "./Geo170K/images/test/614.png", "question": "As shown in the figure, arc AB is a semicircle. Connect AB, point O is the midpoint of AB, points C and D are on arc AB, connecting AD, CO, BC, BD, OD. If angle COD = 62.0 and AD parallel  OC, then the size of angle ABD is ()", "ground_truth": "28\u00b0"}
+{"image_path": "./Geo170K/images/test/615.png", "question": "As shown in the figure, AB is the diameter of circle O, point C and point D are on circle O. Connect AC, BC, AD, CD, if angle BAC = 50.0, then the degree of angle ADC is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/616.png", "question": "As shown in the figure, AB is the diameter of circle O, angle AOC = 140.0, then angle D is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/617.png", "question": "As shown in the figure, in circle O, the diameter AB perpendicular  chord CD at point H, E is the point on circle O, if angle BEC = 25.0, then the degree of angle BAD is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/618.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord of circle O, angle ABD = 53.0, then angle BCD is ()", "ground_truth": "37\u00b0"}
+{"image_path": "./Geo170K/images/test/619.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, angle AOB = 60.0, then the degree of angle C is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/620.png", "question": "As shown in the figure, points A, B, and C are three points on circle O, angle AOC = 110.0, then angle ABC is equal to ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/621.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord, and AB perpendicular  CD, the foot of perpendicular is the point E, it is known that angle COB = 60.0, then the degree of angle DAB is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/622.png", "question": "As shown in the figure, it is known that AB is the diameter of circle O, if the degree of angle BOC is 50.0, then the degree of angle A is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/623.png", "question": "As shown in the figure, points A, B, and C are on circle O, angle AOB = 140.0, and the degree of angle ACB is ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/624.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is a chord of circle O, and CD perpendicular  AB at E, respectively connect AD and BC, it is known that angle D = 65.0, then angle OCD = ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/625.png", "question": "As shown in the figure, AB is the diameter of circle O, CD is the chord of circle O, angle ACD = 42.0, then angle BAD = ().", "ground_truth": "48"}
+{"image_path": "./Geo170K/images/test/626.png", "question": "As shown in the figure, AB is the diameter of circle O, angle AOC = 110.0, then angle D = ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/627.png", "question": "As shown in the figure, AB is the diameter of circle O, point C is on circle O, if angle ABC = 30.0, then angle CAB is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/628.png", "question": "As shown in the figure, P is a point outside circle O, PA and PB intersect circle O at two points C and D respectively. It is known that the central angles of arc AB and arc CD are 90.0 and 50.0 respectively, then angle P = ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/629.png", "question": "As shown in the figure, given that the three points A, B and C are on circle O, AC perpendicular  BO at D, angle B = 50.0, then the degree of angle BOC is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/630.png", "question": "As shown in the figure, in circle O, the length of chord AB is 2.0, OC perpendicular  AB at C, OC = 1.0, if two tangents of circle O are drawn from a point P outside circle O, the tangent points are A and B respectively, then angle APB The degree is ()", "ground_truth": "90\u00b0"}
+{"image_path": "./Geo170K/images/test/631.png", "question": "As shown in the figure, in triangle ABC, AB = 10.0, AC = 8.0, BC = 6.0, the moving circle passing through point C and tangent to edge AB intersects CA and CB at points P and Q respectively, then the minimum value of the length of the line segment PQ is ()", "ground_truth": "4.8"}
+{"image_path": "./Geo170K/images/test/632.png", "question": "As shown in the figure, in triangle ABC, AB = 2.0, AC = 1.0, the circle with AB as the diameter is tangent to AC and intersects the edge BC at point D, then the length of AD is ()", "ground_truth": "\\frac{2}{5}\u221a{5}"}
+{"image_path": "./Geo170K/images/test/633.png", "question": "As shown in the figure, AB is the diameter of circle O, point C is on the extended line of AB, CD is tangent to circle O, and the tangent point is D. If angle A = 35.0, then angle C is equal to ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/634.png", "question": "As shown in the figure, in Rttriangle ABC, angle ACB = 90.0, AC = 4.0, BC = 3.0, the circle with AC as the diameter intersects AB at D, then the length of AD is ()", "ground_truth": "\\frac{16}{5}"}
+{"image_path": "./Geo170K/images/test/635.png", "question": "As shown in the figure, it is known that AD and BC intersect at point O, AB parallel  CD, if angle B = 40.0, angle D = 30.0, then the size of angle AOC is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/636.png", "question": "As shown in the figure, in triangle ABC, AB = AC, D is the midpoint of BC, angle B = 40.0, then angle BAD = ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/637.png", "question": "As shown in the figure, angle A = 70.0, angle 2 = 130.0, then angle 1 = ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/638.png", "question": "As shown in the figure, in the quadrilateral ABCD, angle BAD = 120.0, angle B = angle D = 90.0, if you find a point M on BC and CD respectively, so that the perimeter of triangle AMN is the smallest, then the degree of angle AMN + angle ANM is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/639.png", "question": "As shown in the figure, fold the rectangle ABCD along the line segment OG to the position of OB'C'G, angle OGC' is equal to 100.0, then the degree of angle DGC' is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/640.png", "question": "As shown in the figure, AB is the diameter of circle O, if angle BDC = 40.0, then the degree of angle BOC is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/641.png", "question": "As shown in the figure, fold triangle ABC so that point A coincides with point D at BC, and the crease is MN. If AB = 9.0, BC = 6.0, then the perimeter of triangle DNB is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/642.png", "question": "As shown in the figure, the perimeter of parallelogram ABCD is 36.0, the diagonal AC and BD intersect at point O, point E is the midpoint of CD, BD = 12.0, then the perimeter of triangle DOE is ()", "ground_truth": "15"}
+{"image_path": "./Geo170K/images/test/643.png", "question": "As shown in the figure, points A, B, C, D are on circle O, angle AOC = 140.0, point B is the midpoint of arc AC, then the degree of angle D is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/644.png", "question": "As shown in the figure, AB is the diameter of circle O, and points C and D are on circle O. If angle ABD = 50.0, then the degree of angle BCD is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/645.png", "question": "As shown in the figure, in triangle ABC, angle CAB = 30.0, rotate triangle ABC anticlockwise in the plane around point A to the position of triangle AB'C', and CC' parallel  AB, then the degree of rotation angle is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/646.png", "question": "As shown in the figure, O is a point on the straight line AB, angle 1 = 40.0, OD bisects angle BOC, then the degree of angle 2 is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/647.png", "question": "As shown in the picture, it is a beautiful Pythagorean tree, in which all quadrilaterals are squares, and all triangles are right triangles. The areas of square A, B, C, and D are 2.0, 5.0, 1.0, 2.0, respectively. Then the area of \u200b\u200bthe largest square E is ().", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/648.png", "question": "As shown in the figure, the straight lines AB and CD are cut by BC. If AB parallel  CD, angle 1 = 45.0, angle 2 = 35.0, then angle 3 = ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/649.png", "question": "As shown in the figure, in Rttriangle ABC, angle BAC = 90.0, rotate triangle ABC clockwise around point A by 90.0 to obtain triangle AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032. If angle CC\u2032B\u2032 = 32.0, then the size of angle AC\u2032B\u2032 is ()", "ground_truth": "13\u00b0"}
+{"image_path": "./Geo170K/images/test/650.png", "question": "Known: As shown in the figure, AB parallel  CD, BC bisects angle ABD, and angle C = 40.0, then the degree of angle D is ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/651.png", "question": "Given that the straight line a parallel  b, angle 1 and angle 2 are mutually complementary, angle 3 = 121.0, then angle 4 is equal to ()", "ground_truth": "149\u00b0"}
+{"image_path": "./Geo170K/images/test/652.png", "question": "In triangle ABC, AB = AC, D and E are respectively on BC and AC, AD = AE, angle CDE = 20.0, then the degree of angle BAD is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/653.png", "question": "As shown in the figure, the vertex A of the line parallel  n, Rttriangle ABC is on the line n, angle C = 90.0, AB, CB intersect the line at point D and point E respectively, and DB = DE, if angle B = 25.0, then the degree of angle 1 is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/654.png", "question": "As shown in the figure, the straight line a parallel  b, the straight line c and the straight lines a, b intersect at the points A, B, AM perpendicular  b, and the foot of perpendicular is the point M. If angle 1 = 58.0, then angle 2 = ()", "ground_truth": "32\u00b0"}
+{"image_path": "./Geo170K/images/test/655.png", "question": "As shown in the figure, put the right-angled vertex of a right triangle board on one side of the ruler, if angle 1 = 30.0, then angle 2 is ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/656.png", "question": "As shown in the figure, AB perpendicular  CD at D, DE perpendicular  DF, if angle BDE = 60.0, then angle CDF is equal to ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/657.png", "question": "As shown in the figure, the measured BD = 120.0, DC = 60.0, EC = 50.0, then the width of the river AB is ()", "ground_truth": "100m"}
+{"image_path": "./Geo170K/images/test/658.png", "question": "As shown in the figure, in triangle ABE, the perpendicular bisector of AE MN intersects BE at point C, angle E = 30.0, and AB = CE, then the degree of angle BAE is ()", "ground_truth": "90\u00b0"}
+{"image_path": "./Geo170K/images/test/659.png", "question": "As shown in the figure, in triangle ABC, D and E are points on edges AB and AC respectively, DE parallel  BC, angle ADE = 35.0, angle C = 120.0, then angle A is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/660.png", "question": "As shown in the figure, points A, B, and C are on circle O, angle AOB = 72.0, then angle ACB is equal to ()", "ground_truth": "36\u00b0"}
+{"image_path": "./Geo170K/images/test/661.png", "question": "Fold a rectangular piece of paper as shown in the picture, and then unfold it. If angle 1 = 56.0, then angle 2 is equal to ()", "ground_truth": "68\u00b0"}
+{"image_path": "./Geo170K/images/test/662.png", "question": "As shown in the figure, the straight line a parallel  b, angle 1 = 72.0, then the degree of angle 2 is ()", "ground_truth": "108\u00b0"}
+{"image_path": "./Geo170K/images/test/663.png", "question": "As shown in the figure, the two vertices of a right triangle with 30.0 angle are placed on the opposite side of a rectangle. If angle 1 = 25.0, then the degree of angle 2 is ()", "ground_truth": "115\u00b0"}
+{"image_path": "./Geo170K/images/test/664.png", "question": "As shown in the figure, it is known that AB and AD are the chords of circle O, angle ABO = 30.0, angle ADO = 20.0, then angle BAD = ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/665.png", "question": "As shown in the figure, C and D are two points on the line segment AB, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AB is equal to ()", "ground_truth": "10cm"}
+{"image_path": "./Geo170K/images/test/666.png", "question": "As shown in the figure, C and D are two points on the line segment AB, if AC = 3.0, C is the midpoint of AD and AB = 10.0, then DB = ()", "ground_truth": "4cm"}
+{"image_path": "./Geo170K/images/test/667.png", "question": "As shown in the figure, in circle O, AC is the diameter, MA and MB are tangent to circle O at points A, B, angle BAC = 25.0, then the size of angle AMB is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/668.png", "question": "Given: AB parallel  CD, angle ABE = 120.0, angle C = 25.0, then the degree of angle \u03b1 is ()", "ground_truth": "85\u00b0"}
+{"image_path": "./Geo170K/images/test/669.png", "question": "As shown in the figure, the straight lines AB and CD intersect at point O, OD bisects angle AOE, angle BOC = 50.0, then angle EOB = ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/670.png", "question": "As shown in the figure, a big tree breaks at B whose height is 9.0 from the ground, and the top A of the tree falls at 12.0 from the bottom C of the tree. The height before the break is ()", "ground_truth": "24\u7c73"}
+{"image_path": "./Geo170K/images/test/671.png", "question": "As shown in the figure, cross point A on circle O to draw a tangent of circle O, and it intersects the extended line of diameter BC at point D, connect AB, if angle B = 25.0, then the degree of angle D is ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/672.png", "question": "As shown in the figure, place the right-angled vertex of the triangular plate (angle A = 30.0) with 30.0 angle on one of the two parallel lines. If angle 1 = 38.0, then the degree of angle 2 ()", "ground_truth": "22\u00b0"}
+{"image_path": "./Geo170K/images/test/673.png", "question": "As shown in the figure, Xiaoming walks from point A in the direction of 80.0 to the north by east to point B, and then from point B to the direction of 25.0 to the south by west to point C, then the degree of angle ABC is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/674.png", "question": "As shown in the figure, given that the straight lines AB and CD intersect at point O, OE perpendicular  AB, angle EOC = 30.0, then the degree of angle BOD is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/675.png", "question": "The three views of a geometry are shown in the figure, where the front view and the left view are both equilateral triangles with edge length 2.0, then the surface area of \u200b\u200bthe geometry is ()", "ground_truth": "3\u03c0"}
+{"image_path": "./Geo170K/images/test/676.png", "question": "As shown in the figure, in the right triangle ABC, angle C = 90.0, AB = 5.0, AC = 4.0, then the value of sinangle B is ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/677.png", "question": "As shown in the figure, in Rttriangle ABC, angle C is a right angle, CD perpendicular  AB at D, it is known that AC = 3.0, AB = 5.0, then tanangle BCD is equal to ()", "ground_truth": "\\frac{4}{3}"}
+{"image_path": "./Geo170K/images/test/678.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, if AD = 3.0, DB = 6.0, DE = 2.5, then the length of BC is ()", "ground_truth": "7.5"}
+{"image_path": "./Geo170K/images/test/679.png", "question": "As shown in the figure, in Rttriangle ABC, angle BAC = 90.0, AB = 3.0, AC = 4.0, point P is any point on BC, connect PA, take PA and PC as adjacent edges to make parallelogram PAQC, connect PQ, then the minimum value of PQ is ()", "ground_truth": "\\frac{12}{5}"}
+{"image_path": "./Geo170K/images/test/680.png", "question": "As shown in the figure, AB parallel  CD, AB = 6.0, CD = 9.0, AD = 10.0, then the length of OD is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/681.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, intersect AB and AC at points D and E respectively. If AD = 2.0, DB = 3.0, BC = 6.0, then the length of DE is ()", "ground_truth": "\\frac{12}{5}"}
+{"image_path": "./Geo170K/images/test/682.png", "question": "As shown in the figure, AB is the diameter of circle O, C is the point on circle O, chord AD bisects angle BAC, intersects BC at point E, AB = 6.0, AD = 5.0, then the length of DE is ()", "ground_truth": "2.2"}
+{"image_path": "./Geo170K/images/test/683.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, frac {AD}{DB}=frac {1.0}{2.0},DE=4.0, then the length of BC is ()", "ground_truth": "12"}
+{"image_path": "./Geo170K/images/test/684.png", "question": "As shown in the figure, AD\u2022AB = AE\u2022AC, angle ADE = 80.0, angle A = 60.0, then angle B = ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/685.png", "question": "As shown in the figure, DC parallel  EF parallel  AB, iffrac {EG}{AB}=frac {1.0}{2.0},DC=6.0, then the length of GF is ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/686.png", "question": "As shown on the right, in triangle ABC, DE parallel  BC, frac {AD}{AB}=frac {2.0}{5.0},DE=3.0,then the length of BC is ()", "ground_truth": "7.5"}
+{"image_path": "./Geo170K/images/test/687.png", "question": "As shown in the figure, in the parallelogram ABCD, E is the midpoint of BC, and AE and BD intersect at point F. If the area of \u200b\u200btriangle BFE is 3.0, then the area of \u200b\u200btriangle ABF is ()", "ground_truth": "6"}
+{"image_path": "./Geo170K/images/test/688.png", "question": "As shown in the figure, in triangle ABC, D and E are the midpoints of AB and AC respectively. It is known that the area of \u200b\u200btriangle ADE is 1.0, then the area of \u200b\u200btriangle ABC is ()", "ground_truth": "4"}
+{"image_path": "./Geo170K/images/test/689.png", "question": "As shown in the figure, it is known that AB, CD, and EF are parallel to each other, and AB = 1.0, CD = 4.0, then the length of EF is ()", "ground_truth": "\\frac{4}{5}"}
+{"image_path": "./Geo170K/images/test/690.png", "question": "As shown in the figure, in triangle ABC, DE parallel  BC, intersect AB and AC at points D and E respectively. If AE = 3.0, EC = 6.0, then the value of frac  DEBC is ()", "ground_truth": "\\frac{1}{3}"}
+{"image_path": "./Geo170K/images/test/691.png", "question": "As shown in the figure, BE and CF are the two heights of triangle ABC. If AB = 6.0, BC = 5.0, EF = 3.0, then the length of AE is ()", "ground_truth": "\\frac{18}{5}"}
+{"image_path": "./Geo170K/images/test/692.png", "question": "As shown in the figure, the inclination angle angle ABD of the stairs AB with the length 4.0 is 60.0. In order to improve the safety performance of the stairs, the stairs are prepared to be rebuilt so that the inclination angle angle ACD is 45.0, then the length of the adjusted stairs AC is ()", "ground_truth": "2\u221a{6}m"}
+{"image_path": "./Geo170K/images/test/693.png", "question": "As shown in the figure, the cross section of the dam, the horizontal width of the slope AB is 12.0, and the slope of the slope is 1.0:2.0, then the length of the slope AB is ()", "ground_truth": "6\u221a{5}m"}
+{"image_path": "./Geo170K/images/test/694.png", "question": "As shown in the figure, the cable is fixed at the height of 5.0 from the ground to fix the pole, the cable and the ground form an angle 60.0, then the length of the cable AC is ()", "ground_truth": "\\frac{10\u221a{3}}{3}m"}
+{"image_path": "./Geo170K/images/test/695.png", "question": "As shown in the figure, in triangle ABC, points D and E are the midpoints of AB and AC respectively. If the area of \u200b\u200btriangle ADE is 4.0, then the area of \u200b\u200btriangle ABC is ()", "ground_truth": "16"}
+{"image_path": "./Geo170K/images/test/696.png", "question": "As shown in the figure, in the rectangle ABCD, AB = 4.0, BC = 2.0, point M is on BC, connect AM to make angle AMN = angle AMB, point N is on the straight line AD, MN intersects CD at point E, then the maximum value of BM\u2022AN is ()", "ground_truth": "10"}
+{"image_path": "./Geo170K/images/test/697.png", "question": "As shown in the figure, in triangle ABC, angle ACB = 90.0, CD perpendicular  AB at D, CD = 4.0, BC = 5.0, then AC = ()", "ground_truth": "\\frac{20}{3}"}
+{"image_path": "./Geo170K/images/test/698.png", "question": "As shown in the figure, there is a square DEFG in triangle ABC, where D is on AC, E and F are on AB, and the straight line AG intersects DE and BC at M and N points respectively. If angle B = 90.0, AB = 8.0, BC = 6.0, EF = 2.0, then the length of BN is ()", "ground_truth": "\\frac{24}{7}"}
+{"image_path": "./Geo170K/images/test/699.png", "question": "As shown in the figure, in triangle ABC, points D and E are points on edges AB and AC respectively, and DE parallel  BC, if AD = 5.0, BD = 10.0, DE = 3.0, then the length of BC is ()", "ground_truth": "9"}
+{"image_path": "./Geo170K/images/test/700.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, angle BAC = 120.0, AB = AC, BD is the diameter of circle O, AB = 3.0, then the value of AD is ()", "ground_truth": "3\u221a{3}"}
+{"image_path": "./Geo170K/images/test/701.png", "question": "As shown in the figure, in circle O, angle ABC = 130.0, then angle AOC is equal to ()", "ground_truth": "100\u00b0"}
+{"image_path": "./Geo170K/images/test/702.png", "question": "As shown in the figure, AC and BC are the diameters of two semicircles, angle ACP = 30.0, if AB = 20.0, the value of PQ is ()", "ground_truth": "10\u221a{3}cm"}
+{"image_path": "./Geo170K/images/test/703.png", "question": "As shown in the figure, in the quadrilateral ABCD, AB = AC = AD, angle CBD = 23.0, then angle CAD is ()", "ground_truth": "46\u00b0"}
+{"image_path": "./Geo170K/images/test/704.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, angle BCO = 40.0, then the degree of angle A is equal to ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/705.png", "question": "As shown in the figure, A, B, and C are three points on circle O, angle ABC = 25.0, then the degree of angle AOC is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/706.png", "question": "As shown in the figure, A, B, and C are points on circle O, angle ACB = 32.0, then angle AOB is equal to ()", "ground_truth": "64\u00b0"}
+{"image_path": "./Geo170K/images/test/707.png", "question": "As shown in the figure, points A, B, and C are on circle O, if angle ABC = 35.0, then the degree of angle AOC is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/708.png", "question": "As shown in the figure, points A, B, and C are on circle O and connect AB and AC. If angle BOC = 100.0, then the degree of angle B + angle C is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/709.png", "question": "As shown in the figure, place the vertex of the right triangle 45.0 angle on the center O, the hypotenuse and the leg intersect circle O at two points A and B respectively, and C is any point on the major arc AB (not coincident with A and B) , Then the degree of angle ACB is ()", "ground_truth": "22.5\u00b0"}
+{"image_path": "./Geo170K/images/test/710.png", "question": "As shown in the figure, the line segment AB is the diameter of circle O, the chord CD \u4e04 AB, angle CAB = 20.0, then angle AOD is equal to ()", "ground_truth": "140\u00b0"}
+{"image_path": "./Geo170K/images/test/711.png", "question": "As shown in the figure, the vertices A, B, and D of parallelogram ABCD are on circle O, and the vertex C is on the diameter BE of circle O, connect AE, angle E = 36.0, then the degree of angle ADC is ()", "ground_truth": "54\u00b0"}
+{"image_path": "./Geo170K/images/test/712.png", "question": "The diameter of the protractor coincides with the hypotenuse AB of the right triangle ABC, where the endpoint N of the scale line of the protractor O coincides with point A, the radial CP starts from CA and rotates clockwise at a speed of 3.0 degrees per second, and CP and the semicircular arc of the protractor intersect at point E, when the 20.0 second, the corresponding reading of point E on the protractor is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/713.png", "question": "As shown in the figure, the three points A, B, and C are on circle O, and angle ABO = 50.0, then angle ACB is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/714.png", "question": "AB is the diameter of circle O, point C is on circle O, if angle C = 15.0, then angle BOC = ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/715.png", "question": "As shown in the figure, the two chords AB and CD in the circle intersect at E, angle D = 35.0, angle AEC = 105.0, then angle C = ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/716.png", "question": "As shown in the figure, AB and CD are the two chords of circle O, connect AD and BC. If angle BCD = 70.0, then the degree of angle BAD is ()", "ground_truth": "70\u00b0"}
+{"image_path": "./Geo170K/images/test/717.png", "question": "As shown in the figure, A, B, and C are on circle O, if angle BAC = 24.0, then the degree of angle BOC is ()", "ground_truth": "48\u00b0"}
+{"image_path": "./Geo170K/images/test/718.png", "question": "As shown in the figure, points A, B, and C are all on circle O, if angle C = 34.0, then angle AOB is ()", "ground_truth": "68\u00b0"}
+{"image_path": "./Geo170K/images/test/719.png", "question": "As shown in the figure, circle A with a diameter of 10.0 passes through point C(0.0,5.0) O(0.0,0.0), B is a point on the circle A major arc on the right side of the y-axis, then the degree of angle OBC is ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/720.png", "question": "As shown in the figure, triangle ABC is inscribed in circle O, angle A = 15.0, connect OB, then angle OBC is equal to ()", "ground_truth": "75\u00b0"}
+{"image_path": "./Geo170K/images/test/721.png", "question": "As shown in the figure, in circle O, angle ABC = 40.0, then angle AOC = () degrees.", "ground_truth": "80"}
+{"image_path": "./Geo170K/images/test/722.png", "question": "As shown in the figure, A, B, C are the three points on circle O, and angle CAO = 25.0, angle BCO = 35.0, then the degree of angle AOB is ()", "ground_truth": "120\u00b0"}
+{"image_path": "./Geo170K/images/test/723.png", "question": "As shown in the figure, AB is the diameter of circle O, chord CD perpendicular  AB, E is a point of arc BC, if angle CEA = 28.0, then the degree of angle ABD is ()", "ground_truth": "28\u00b0"}
+{"image_path": "./Geo170K/images/test/724.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, and it is known that angle B = 70.0, then the degree of angle CAO is ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/725.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, angle OCB = 30.0, then the degree of angle A is equal to ()", "ground_truth": "60\u00b0"}
+{"image_path": "./Geo170K/images/test/726.png", "question": "As shown in the figure, circle O is the circumscribed circle of triangle ABC, AB is the diameter, if angle BOC = 70.0, then angle A is equal to ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/727.png", "question": "As shown in the figure, in circle O, diameter AB = 5.0, chord AC = 4.0, then the distance from point O to line AC is ()", "ground_truth": "1.5cm"}
+{"image_path": "./Geo170K/images/test/728.png", "question": "As shown in the figure, AB is the diameter of circle O, if angle BAC = 35.0, then angle ADC = ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/729.png", "question": "A pair of right triangle plates are placed as shown (angle ACB = angle ADB = 90.0 ), angle CAB = 30.0, angle BAD = 45.0, AB intersects CD at E, then the degree of angle CEB is ()", "ground_truth": "75\u00b0"}
+{"image_path": "./Geo170K/images/test/730.png", "question": "As shown in the figure below, point C is on the semicircle O with AB as the diameter, angle BAC = 20.0, then angle BOC is equal to ()", "ground_truth": "40\u00b0"}
+{"image_path": "./Geo170K/images/test/731.png", "question": "As shown in the figure, AB is the diameter of circle O, angle ABC = 25.0, then the degree of angle D is ()", "ground_truth": "65\u00b0"}
+{"image_path": "./Geo170K/images/test/732.png", "question": "As shown in the figure, triangle ABC is the inscribed triangle of circle O, BD is the diameter, if angle DBC = 18.0, then the degree of angle A is ()", "ground_truth": "72\u00b0"}
+{"image_path": "./Geo170K/images/test/733.png", "question": "As shown in the figure, AB and CD are the chords of circle O, and AB parallel  CD, if angle BAD = 36.0, then angle AOC is equal to ()", "ground_truth": "72\u00b0"}
+{"image_path": "./Geo170K/images/test/734.png", "question": "As shown in the figure, A, B, and C are the three points on circle O, if angle C = 35.0, then the degree of angle OAB is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/735.png", "question": "As shown in the figure, the vertices A, B, and D of parallelogram ABCD are on circle O, and the vertex C is on the diameter BE of circle O, angle ADC = 54.0, connect AE, then the degree of angle AEB is ()", "ground_truth": "36\u00b0"}
+{"image_path": "./Geo170K/images/test/736.png", "question": "As shown in the figure, CD is the diameter of circle O, chord DE parallel  OA, if the degree of angle D is 50.0, then the degree of angle A is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/737.png", "question": "As shown in the figure, it is known that circle O is the circumscribed circle of triangle ABC, angle AOB = 110.0, then the degree of angle C is ()", "ground_truth": "55\u00b0"}
+{"image_path": "./Geo170K/images/test/738.png", "question": "As shown in the figure, AB is the diameter of circle O, and point C is on circle O. If angle A = 40.0, then the degree of angle B is ()", "ground_truth": "50\u00b0"}
+{"image_path": "./Geo170K/images/test/739.png", "question": "As shown in the figure, if AB is the diameter of circle O, CD is the chord of circle O, angle ABD = 55.0, then the degree of angle BCD is ()", "ground_truth": "35\u00b0"}
+{"image_path": "./Geo170K/images/test/740.png", "question": "As shown in the figure, points A, B, and C are three points on circle O, angle BAC = 40.0, then the degree of angle BOC is ()", "ground_truth": "80\u00b0"}
+{"image_path": "./Geo170K/images/test/741.png", "question": "As shown in the figure, in the circle O with a radius of 5.0, if the length of the chord AB is 8.0, then its distance from the chord OC to the centre is equal to ()", "ground_truth": "3"}
+{"image_path": "./Geo170K/images/test/742.png", "question": "As shown in the figure, point O is the center of circle O, points A, B, and C are on circle O, AO parallel  BC, angle AOB = 40.0, then the degree of angle OAC is equal to ()", "ground_truth": "20\u00b0"}
+{"image_path": "./Geo170K/images/test/743.png", "question": "It is known that: as shown in the figure, the diameter AB of circle O is perpendicular to the chord CD, and the foot of perpendicular is E. If AB = 10.0, CD = 6.0, then the length of BE is ()", "ground_truth": "1"}
+{"image_path": "./Geo170K/images/test/744.png", "question": "As shown in the figure, the radius of circle O is 10.0, AB is the chord, OC perpendicular  AB, and the foot of perpendicular is E. If CE = 4.0, then the length of AB is ()", "ground_truth": "16"}
+{"image_path": "./Geo170K/images/test/745.png", "question": "As shown in the figure, the edge length of the square ABCD is 3.0, and the equilateral triangle PCD and equilateral triangle QCD are made on both sides of CD with CD as one edge, then the length of PQ is ()", "ground_truth": "3\u221a{3}"}
+{"image_path": "./Geo170K/images/test/746.png", "question": "As shown in the figure, in the square ABCD, E is a point on DC, F is a point on the extended line of BC, angle BEC = 70.0, and triangle BCE congruent  triangle DCF. Connect EF, then the degree of angle EFD is ()", "ground_truth": "25\u00b0"}
+{"image_path": "./Geo170K/images/test/747.png", "question": "As shown in the figure, it is known that the radius of circle O is 5.0 and the chord AB = 8.0, then the distance from the center O to AB is ()", "ground_truth": "3mm"}
+{"image_path": "./Geo170K/images/test/748.png", "question": "As shown in the figure, MN is tangent to circle O at point A, angle AOB = 60.0, then angle BAM is equal to ()", "ground_truth": "30\u00b0"}
+{"image_path": "./Geo170K/images/test/749.png", "question": "As shown in the figure, the tangents PA and PB of a circle drawn from a point P outside circle O, the tangent points are A and B respectively, if angle APB = 70.0, then the degree of the minor arc AB sandwiched by these two tangents is ()", "ground_truth": "110\u00b0"}
+{"image_path": "./Geo170K/images/test/750.png", "question": "As shown in the figure, PB is tangent to circle O at point B, PO intersects circle O at point E, extends PO and intersects circle O at point A, connects AB, the radius of circle O OD perpendicular  AB at point C, BP \u200b\u200b= 6.0, angle P = 30.0 , then the length of CD is ()", "ground_truth": "\u221a{3}"}
+{"image_path": "./Geo170K/images/test/751.png", "question": "As shown in the figure, PA and PB are tangent to circle O to A and B respectively. Point C and point D are the moving points on line segments PA and PB, and CD always remains tangent to circle O. If PA = 8.0, then perimeter of triangle PCD is ()", "ground_truth": "16"}
+{"image_path": "./Geo170K/images/test/752.png", "question": "As shown in the figure, in the two concentric circles, the chord AB of the great circle is tangent to the small circle at point C. If AB = 6.0, the area of \u200b\u200bthe ring is ()", "ground_truth": "9\u03c0"}
+{"image_path": "./Geo170K/images/test/753.png", "question": "As shown in the figure, the squares P and Q are sandwiched in the ABCD frame, the angle between the lower edge of the square P and AB is 15.0, and the angle between the two adjacent edges of the square P and Q is 150.0, then angle 1 is ()", "ground_truth": "15\u00b0"}
diff --git a/previous_version/Video-R1-main-previous/src/eval/prompts/superclevr_test200_counting_problems.jsonl b/previous_version/Video-R1-main-previous/src/eval/prompts/superclevr_test200_counting_problems.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..78445e8980bc81a48f533b526e4403748ac44d60
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/eval/prompts/superclevr_test200_counting_problems.jsonl
@@ -0,0 +1,200 @@
+{"image_path": "./images/superCLEVR_new_025000.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025001.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025002.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025003.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025004.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025005.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025006.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025007.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025008.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025009.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025010.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025011.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025012.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025013.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025014.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025015.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025016.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025017.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025018.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025019.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025020.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025021.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025022.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025023.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025024.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025025.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025026.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025027.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025028.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025029.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025030.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025031.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025032.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025033.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025034.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025035.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025036.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025037.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025038.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025039.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025040.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025041.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025042.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025043.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025044.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025045.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025046.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025047.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025048.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025049.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025050.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025051.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025052.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025053.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025054.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025055.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025056.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025057.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025058.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025059.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025060.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025061.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025062.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025063.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025064.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025065.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025066.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025067.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025068.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025069.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025070.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025071.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025072.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025073.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025074.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025075.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025076.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025077.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025078.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025079.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025080.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025081.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025082.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025083.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025084.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025085.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025086.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025087.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025088.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025089.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025090.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025091.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025092.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025093.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025094.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025095.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025096.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025097.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025098.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025099.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025100.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025101.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025102.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025103.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025104.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025105.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025106.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025107.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025108.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025109.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025110.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025111.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025112.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025113.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025114.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025115.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025116.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025117.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025118.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025119.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025120.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025121.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025122.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025123.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025124.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025125.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025126.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025127.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025128.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025129.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025130.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025131.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025132.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025133.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025134.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025135.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025136.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025137.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025138.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025139.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025140.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025141.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025142.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025143.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025144.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025145.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025146.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025147.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025148.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025149.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025150.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025151.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025152.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025153.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025154.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025155.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025156.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025157.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025158.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025159.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025160.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025161.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025162.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025163.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025164.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025165.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025166.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025167.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025168.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025169.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025170.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025171.png", "question": "How many different items are there in the image?", "ground_truth": 7}
+{"image_path": "./images/superCLEVR_new_025172.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025173.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025174.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025175.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025176.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025177.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025178.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025179.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025180.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025181.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025182.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025183.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025184.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025185.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025186.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025187.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025188.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025189.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025190.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025191.png", "question": "How many different items are there in the image?", "ground_truth": 8}
+{"image_path": "./images/superCLEVR_new_025192.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025193.png", "question": "How many different items are there in the image?", "ground_truth": 9}
+{"image_path": "./images/superCLEVR_new_025194.png", "question": "How many different items are there in the image?", "ground_truth": 10}
+{"image_path": "./images/superCLEVR_new_025195.png", "question": "How many different items are there in the image?", "ground_truth": 5}
+{"image_path": "./images/superCLEVR_new_025196.png", "question": "How many different items are there in the image?", "ground_truth": 6}
+{"image_path": "./images/superCLEVR_new_025197.png", "question": "How many different items are there in the image?", "ground_truth": 3}
+{"image_path": "./images/superCLEVR_new_025198.png", "question": "How many different items are there in the image?", "ground_truth": 4}
+{"image_path": "./images/superCLEVR_new_025199.png", "question": "How many different items are there in the image?", "ground_truth": 3}
diff --git a/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_counting_superclevr.py b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_counting_superclevr.py
new file mode 100644
index 0000000000000000000000000000000000000000..e14957313f0a6d09ff0c7b34e65fbdf803aa78d7
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_counting_superclevr.py
@@ -0,0 +1,136 @@
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+import json
+from tqdm import tqdm
+import re
+
+
+
+MODEL_PATH="Qwen2-VL-2B-GRPO-CLEVR-70k/checkpoint-100" # Qwen2vl-2b-Instruct for original scores
+BSZ=64 # reduce it if GPU OOM
+OUTPUT_PATH="./logs/counting_results_superclevr_200_qwen2vl_2b_instruct_grpo_100.json"
+PROMPT_PATH="./prompts/superclevr_test200_counting_problems.jsonl"
+
+#We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+    device_map="auto",
+)
+
+# default processer
+processor = AutoProcessor.from_pretrained(MODEL_PATH)
+
+data = []
+with open(PROMPT_PATH, "r") as f:
+    for line in f:
+        data.append(json.loads(line))
+
+
+QUESTION_TEMPLATE = "{Question} First output the thinking process in <think> </think> and final answer (number) in <answer> </answer> tags."
+
+messages = []
+
+for i in data:
+    message = [{
+        "role": "user",
+        "content": [
+            {
+                "type": "image", 
+                "image": f"file://{i['image_path']}"
+            },
+            {
+                "type": "text",
+                "text": QUESTION_TEMPLATE.format(Question=i['question'])
+            }
+        ]
+    }]
+    messages.append(message)
+
+
+
+
+all_outputs = []  # List to store all answers
+
+# Process data in batches
+for i in tqdm(range(0, len(messages), BSZ)):
+    batch_messages = messages[i:i + BSZ]
+    
+    # Preparation for inference
+    text = [processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in batch_messages]
+    
+    image_inputs, video_inputs = process_vision_info(batch_messages)
+    inputs = processor(
+        text=text,
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=256, do_sample=False)
+    
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    batch_output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    
+    all_outputs.extend(batch_output_text)
+    print(f"Processed batch {i//BSZ + 1}/{(len(messages) + BSZ - 1)//BSZ}")
+
+
+def extract_number_answer(output_str):
+    # Try to find the number within <answer> tags, if can not find, return None
+    answer_pattern = r'<answer>\s*(\d+)\s*</answer>'
+    match = re.search(answer_pattern, output_str)
+    
+    if match:
+        return int(match.group(1))
+    return None
+
+
+final_output = []
+correct_number = 0
+
+for input_example, model_output in zip(data,all_outputs):
+    original_output = model_output
+    ground_truth = input_example['ground_truth']
+    model_answer = extract_number_answer(original_output)
+    
+    # Create a result dictionary for this example
+    result = {
+        'question': input_example,
+        'ground_truth': ground_truth,
+        'model_output': original_output,
+        'extracted_answer': model_answer
+    }
+    final_output.append(result)
+    
+    # Count correct answers
+    if model_answer is not None and model_answer == ground_truth:
+        correct_number += 1
+
+# Calculate and print accuracy
+accuracy = correct_number / len(data) * 100
+print(f"\nAccuracy: {accuracy:.2f}%")
+
+# Save results to a JSON file
+output_path = OUTPUT_PATH
+with open(output_path, "w") as f:
+    json.dump({
+        'accuracy': accuracy,
+        'results': final_output
+    }, f, indent=2)
+
+print(f"Results saved to {output_path}")
+
+
+
+
+
diff --git a/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_geoqa.py b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_geoqa.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f335f1ad9b0c90c13fbce53986fd723e9a51ed2
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_geoqa.py
@@ -0,0 +1,149 @@
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+import json
+from tqdm import tqdm
+import re
+from math_verify import parse, verify
+
+
+MODEL_PATH="<MODEL_PATH>" # qwen2vl model or grpoed model on geoqa train
+BSZ=50 # reduce it if GPU OOM
+OUTPUT_PATH="<OUTPUT_LOG>"
+PROMPT_PATH="./prompts/geoqa_test_prompts.jsonl"
+
+#We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+    device_map="auto",
+)
+
+# default processer
+processor = AutoProcessor.from_pretrained(MODEL_PATH)
+
+data = []
+with open(PROMPT_PATH, "r") as f:
+    for line in f:
+        data.append(json.loads(line))
+
+
+QUESTION_TEMPLATE = "{Question} Output the thinking process in <think> </think> and final answer (number) in <answer> </answer> tags."
+
+messages = []
+
+data = data
+
+for i in data:
+    message = [{
+        "role": "user",
+        "content": [
+            {
+                "type": "image", 
+                "image": f"file://{i['image_path']}"
+            },
+            {
+                "type": "text",
+                "text": QUESTION_TEMPLATE.format(Question=i['question'])
+            }
+        ]
+    }]
+    messages.append(message)
+
+
+
+
+all_outputs = []  # List to store all answers
+
+# Process data in batches
+for i in tqdm(range(0, len(messages), BSZ)):
+    batch_messages = messages[i:i + BSZ]
+    
+    # Preparation for inference
+    text = [processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in batch_messages]
+    
+    image_inputs, video_inputs = process_vision_info(batch_messages)
+    inputs = processor(
+        text=text,
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=1024, do_sample=False)
+    
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    batch_output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    
+    all_outputs.extend(batch_output_text)
+    print(f"Processed batch {i//BSZ + 1}/{(len(messages) + BSZ - 1)//BSZ}")
+
+
+
+
+
+final_output = []
+correct_number = 0
+
+for input_example, model_output in zip(data,all_outputs):
+    original_output = model_output
+    ground_truth = input_example['ground_truth']
+    model_answer = parse(original_output) 
+
+    # Count correct answers
+    if model_answer is not None and float(verify(model_answer,parse(ground_truth)))>0:
+        correct_number += 1
+        is_correct = True
+    else:
+        is_correct = False
+    
+    try:
+        result = {
+            'question': input_example,
+            'ground_truth': ground_truth,
+            'model_output': original_output,
+            'extracted_answer':str(model_answer[0]) if model_answer is not None else None,
+            'is_correct':is_correct
+        }
+
+    except Exception as e:
+        print("no answer parsed",e,model_answer)
+        result = {
+            'question': input_example,
+            'ground_truth': ground_truth,
+            'model_output': original_output,
+            'extracted_answer':None,
+            'is_correct':is_correct
+        }
+
+
+
+    final_output.append(result)
+
+
+# Calculate and print accuracy
+accuracy = correct_number / len(data) * 100
+print(f"\nAccuracy: {accuracy:.2f}%")
+
+# Save results to a JSON file
+output_path = OUTPUT_PATH
+with open(output_path, "w") as f:
+    json.dump({
+        'accuracy': accuracy,
+        'results': final_output
+    }, f, indent=2, ensure_ascii=False)
+
+print(f"Results saved to {output_path}")
+
+
+
+
+
diff --git a/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_geoqa_multigpu.py b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_geoqa_multigpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..66e0f99149e8b697979feccca85c857b335ddf67
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_geoqa_multigpu.py
@@ -0,0 +1,205 @@
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+import json
+import tqdm
+from math_verify import parse, verify
+import argparse
+import pandas as pd
+from torch.multiprocessing import Process, set_start_method, Manager
+from transformers.utils.logging import disable_progress_bar
+disable_progress_bar()
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# >>>>> 1. get evaluation configuration <<<<<
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+def get_eval_config():
+    parser = argparse.ArgumentParser(description="Inference script for GeoQA evaluation.")
+    parser.add_argument("--model_path", required=True, type=str, help="Path to the model checkpoint (e.g., qwen2vl model or a fine-tuned model).")
+    parser.add_argument("--batch_size", default=4, type=int, help="Batch size for inference. Reduce if GPU OOM (default: 50).")
+    parser.add_argument("--output_path", required=True, type=str, help="Path to save inference result (e.g., JSON file).")
+    parser.add_argument("--prompt_path", required=True, type=str, help="Path to the prompts JSONL file for GeoQA evaluation.")
+    all_gpu = ",".join(map(str, range(torch.cuda.device_count())))
+    parser.add_argument("--gpu_ids", default=all_gpu, help="comma-separated list of GPU IDs to use")
+    args = parser.parse_args()
+    return args
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# >>>>>>>>>> 2. load testset <<<<<<<<<<<<<
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+def prepare_test_messages(testset_path):
+    testset_data = pd.read_json(testset_path, lines=True).to_dict(orient="records")
+    QUESTION_TEMPLATE = "{Question} Output the thinking process in <think> </think> and final answer (number) in <answer> </answer> tags."
+    tested_messages = []
+    for i in testset_data:
+        message = [{
+            "role": "user",
+            "content": [
+                {
+                    "type": "image", 
+                    "image": f"file://{i['image_path']}"
+                },
+                {
+                    "type": "text",
+                    "text": QUESTION_TEMPLATE.format(Question=i['question'])
+                }
+            ]
+        }]
+        tested_messages.append(message)
+    return testset_data, tested_messages
+
+
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# >>>>> 3. use several GPUs to accelerate inference at testset <<<<<
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+
+def init_model(model_path, gpu_id):
+    """init a model(args.model_path) on a specific gpu"""
+    # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        model_path,
+        torch_dtype=torch.bfloat16,
+        attn_implementation="flash_attention_2",
+        device_map=f"cuda:{gpu_id}",
+    )
+
+    # default processer
+    processor = AutoProcessor.from_pretrained(model_path, use_fast=True)
+    return model, processor
+
+def answer_a_batch_question_qwen(batch_messages, model, processor):
+    """ let qwen answer a batch of questions """
+    text = [processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in batch_messages]        
+    image_inputs, video_inputs = process_vision_info(batch_messages)
+    inputs = processor(
+        text=text,
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to(model.device)
+    
+    generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=1024) # do_sample=False
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    batch_output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    return batch_output_text
+
+def infer_on_single_gpu(model_path, device_id, chunk_of_tested_messages, batch_size, results=None):
+    """init model on this single gpu and let it answer asign chunk of questions"""
+    model, processor = init_model(model_path, device_id)
+    
+    ### split batch
+    responses = []
+    batch_messages_list = [chunk_of_tested_messages[start: start + batch_size] 
+               for start in range(0, len(chunk_of_tested_messages), batch_size)]
+
+    for batch_messages in tqdm.auto.tqdm(batch_messages_list, desc=f"GPU {device_id} progress", position=device_id, leave=False):
+        batch_output_text = answer_a_batch_question_qwen(batch_messages, model, processor)
+        
+        responses.extend(batch_output_text)
+    
+    results[device_id] = responses
+    return
+        
+        
+def multi_gpu_inference(prompts, gpu_ids, model_path, batch_size):
+    """ let each gpu (along with a model) answer a chunk of questions """
+    set_start_method("spawn", force=True)
+    manager = Manager()
+    gpu_id2result = manager.dict()
+
+    gpu_ids = [int(gpu_id.strip()) for gpu_id in gpu_ids.split(',')]
+    num_gpus = len(gpu_ids)
+
+    chunk_size = len(prompts) // num_gpus
+    processes = []
+    for i, gpu_id in enumerate(gpu_ids):
+        start_idx = i * chunk_size
+        end_idx = (i + 1) * chunk_size if i != num_gpus - 1 else len(prompts)
+        chunk = prompts[start_idx: end_idx]
+        process = Process(target=infer_on_single_gpu, args=(model_path, gpu_id, chunk, batch_size, gpu_id2result))
+        process.start()
+        processes.append(process)
+
+    # for process in tqdm.auto.tqdm(processes, desc="Inference progress", position=num_gpus, leave=True):
+    for process in processes:
+        process.join()
+
+    all_predicts = []
+    for gpu_id in gpu_ids:
+        all_predicts.extend(gpu_id2result[gpu_id])
+
+    return all_predicts
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# >>>>>>>>>> 4. compute metrics <<<<<<<<<<<
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+
+def compute_metrics(testset_data, all_predicts):
+    final_output = []
+    correct_number = 0
+
+    for input_example, model_output in zip(testset_data, all_predicts):
+        original_output = model_output
+        ground_truth = input_example['ground_truth']
+        model_answer = parse(original_output) 
+
+        # Count correct answers
+        if model_answer is not None and float(verify(model_answer,parse(ground_truth)))>0:
+            correct_number += 1
+            is_correct = True
+        else:
+            is_correct = False
+        
+        try:
+            result = {
+                'question': input_example,
+                'ground_truth': ground_truth,
+                'model_output': original_output,
+                'extracted_answer':str(model_answer[0]) if model_answer is not None else None,
+                'is_correct':is_correct
+            }
+
+        except Exception as e:
+            print("no answer parsed",e,model_answer)
+            result = {
+                'question': input_example,
+                'ground_truth': ground_truth,
+                'model_output': original_output,
+                'extracted_answer':None,
+                'is_correct':is_correct
+            }
+
+
+
+        final_output.append(result)
+
+
+    # Calculate and print accuracy
+    accuracy = correct_number / len(tested_messages) * 100
+    print(f"\nAccuracy: {accuracy:.2f}%")
+
+    # Save results to a JSON file
+    with open(args.output_path, "w") as f:
+        json.dump({
+            'accuracy': accuracy,
+            'results': final_output
+        }, f, indent=2, ensure_ascii=False)
+
+    print(f"Results saved to {args.output_path}")
+
+
+
+if __name__ == "__main__":
+    args = get_eval_config()
+    testset_data, tested_messages = prepare_test_messages(testset_path=args.prompt_path)
+    all_predicts = multi_gpu_inference(tested_messages, args.gpu_ids, args.model_path, args.batch_size)
+    compute_metrics(testset_data, all_predicts)
+
diff --git a/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_video_counting.py b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_video_counting.py
new file mode 100644
index 0000000000000000000000000000000000000000..81698283a2f6e56b4470509cb0c83719b832f266
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/eval/test_qwen2vl_video_counting.py
@@ -0,0 +1,141 @@
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+import json
+from tqdm import tqdm
+import re
+import os
+
+
+
+MODEL_PATH="YOUR_PATH" # Qwen2vl-2b-Instruct for original scores
+BSZ=64 # reduce it if GPU OOM
+OUTPUT_PATH="YOUR_PATH/test.json"
+PROMPT_PATH="YOUR_PATH/test_dvd.jsonl"
+
+#We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+    device_map="auto",
+)
+
+# default processer
+processor = AutoProcessor.from_pretrained(MODEL_PATH)
+
+data = []
+with open(PROMPT_PATH, "r") as f:
+    for line in f:
+        data.append(json.loads(line))
+
+# detailed step-by-step
+QUESTION_TEMPLATE = "{Question} First output the thinking process in <think> </think> and final answer (number) in <answer> </answer> tags."
+
+messages = []
+
+
+for x in data:
+    message = [{
+        "role": "user",
+        "content": [
+            {
+                "type": "video", 
+                "video": os.getcwd() + "/src/r1-v/data" + x['video_filename'][1:]
+            },
+            {
+                "type": "text",
+                "text": QUESTION_TEMPLATE.format(Question=x['problem'])
+            }
+        ]
+    }]
+    messages.append(message)
+
+
+
+
+all_outputs = []  # List to store all answers
+
+# Process data in batches
+for i in tqdm(range(0, len(messages), BSZ)):
+    batch_messages = messages[i:i + BSZ]
+    
+    # Preparation for inference
+    text = [processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in batch_messages]
+    
+    
+    image_inputs, video_inputs = process_vision_info(batch_messages)
+    inputs = processor(
+        text=text,
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=256, do_sample=False)
+    
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    batch_output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    
+    
+    all_outputs.extend(batch_output_text)
+    print(f"Processed batch {i//BSZ + 1}/{(len(messages) + BSZ - 1)//BSZ}")
+
+
+def extract_number_answer(output_str):
+    # Try to find the number within <answer> tags, if can not find, return None
+    answer_pattern = r'<answer>\s*(\d+)\s*</answer>'
+    match = re.search(answer_pattern, output_str)
+    
+    if match:
+        return int(match.group(1))
+    return None
+
+
+final_output = []
+correct_number = 0
+
+for input_example, model_output in zip(data,all_outputs):
+    original_output = model_output
+    ground_truth = extract_number_answer(input_example['solution'])
+    model_answer = extract_number_answer(original_output)
+    
+    
+    # Create a result dictionary for this example
+    result = {
+        'question': input_example,
+        'ground_truth': ground_truth,
+        'model_output': original_output,
+        'extracted_answer': model_answer
+    }
+    final_output.append(result)
+    
+    # Count correct answers
+    if model_answer is not None and model_answer == ground_truth:
+        correct_number += 1
+
+# Calculate and print accuracy
+accuracy = correct_number / len(data) * 100
+print(f"\nAccuracy: {accuracy:.2f}%")
+
+# Save results to a JSON file
+output_path = OUTPUT_PATH
+with open(output_path, "w") as f:
+    json.dump({
+        'accuracy': accuracy,
+        'results': final_output
+    }, f, indent=2)
+
+print(f"Results saved to {output_path}")
+
+
+
+
+
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/.python-version b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/.python-version
new file mode 100644
index 0000000000000000000000000000000000000000..143c2f5d0b57eae26fc9dec0697e64d7e051ab6c
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/.python-version
@@ -0,0 +1 @@
+3.8.19
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/README.md b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0e4c88d7d71be1d33fbc559165b95e229547301c
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/README.md
@@ -0,0 +1,94 @@
+# qwen-vl-utils
+
+Qwen-VL Utils contains a set of helper functions for processing and integrating visual language information with Qwen-VL Series Model.
+
+## Install
+
+```bash
+pip install qwen-vl-utils
+```
+
+## Usage
+
+### Qwen2VL
+
+```python
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+
+
+# You can directly insert a local file path, a URL, or a base64-encoded image into the position where you want in the text.
+messages = [
+    # Image
+    ## Local file path
+    [{"role": "user", "content": [{"type": "image", "image": "file:///path/to/your/image.jpg"}, {"type": "text", "text": "Describe this image."}]}],
+    ## Image URL
+    [{"role": "user", "content": [{"type": "image", "image": "http://path/to/your/image.jpg"}, {"type": "text", "text": "Describe this image."}]}],
+    ## Base64 encoded image
+    [{"role": "user", "content": [{"type": "image", "image": "data:image;base64,/9j/..."}, {"type": "text", "text": "Describe this image."}]}],
+    ## PIL.Image.Image
+    [{"role": "user", "content": [{"type": "image", "image": pil_image}, {"type": "text", "text": "Describe this image."}]}],
+    ## Model dynamically adjusts image size, specify dimensions if required.
+    [{"role": "user", "content": [{"type": "image", "image": "file:///path/to/your/image.jpg", "resized_height": 280, "resized_width": 420}, {"type": "text", "text": "Describe this image."}]}],
+    # Video
+    ## Local video path
+    [{"role": "user", "content": [{"type": "video", "video": "file:///path/to/video1.mp4"}, {"type": "text", "text": "Describe this video."}]}],
+    ## Local video frames
+    [{"role": "user", "content": [{"type": "video", "video": ["file:///path/to/extracted_frame1.jpg", "file:///path/to/extracted_frame2.jpg", "file:///path/to/extracted_frame3.jpg"],}, {"type": "text", "text": "Describe this video."},],}],
+    ## Model dynamically adjusts video nframes, video height and width. specify args if required.
+    [{"role": "user", "content": [{"type": "video", "video": "file:///path/to/video1.mp4", "fps": 2.0, "resized_height": 280, "resized_width": 280}, {"type": "text", "text": "Describe this video."}]}],
+]
+
+processor = AutoProcessor.from_pretrained(model_path)
+model = Qwen2VLForConditionalGeneration.from_pretrained(model_path, torch_dtype="auto", device_map="auto")
+text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+images, videos = process_vision_info(messages)
+inputs = processor(text=text, images=images, videos=videos, padding=True, return_tensors="pt")
+print(inputs)
+generated_ids = model.generate(**inputs)
+print(generated_ids)
+```
+
+### Qwen2.5VL
+
+```python
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+
+
+# You can set the maximum tokens for a video through the environment variable VIDEO_MAX_PIXELS
+# based on the maximum tokens that the model can accept. 
+# export VIDEO_MAX_PIXELS = 32000 * 28 * 28 * 0.9
+
+
+# You can directly insert a local file path, a URL, or a base64-encoded image into the position where you want in the text.
+messages = [
+    # Image
+    ## Local file path
+    [{"role": "user", "content": [{"type": "image", "image": "file:///path/to/your/image.jpg"}, {"type": "text", "text": "Describe this image."}]}],
+    ## Image URL
+    [{"role": "user", "content": [{"type": "image", "image": "http://path/to/your/image.jpg"}, {"type": "text", "text": "Describe this image."}]}],
+    ## Base64 encoded image
+    [{"role": "user", "content": [{"type": "image", "image": "data:image;base64,/9j/..."}, {"type": "text", "text": "Describe this image."}]}],
+    ## PIL.Image.Image
+    [{"role": "user", "content": [{"type": "image", "image": pil_image}, {"type": "text", "text": "Describe this image."}]}],
+    ## Model dynamically adjusts image size, specify dimensions if required.
+    [{"role": "user", "content": [{"type": "image", "image": "file:///path/to/your/image.jpg", "resized_height": 280, "resized_width": 420}, {"type": "text", "text": "Describe this image."}]}],
+    # Video
+    ## Local video path
+    [{"role": "user", "content": [{"type": "video", "video": "file:///path/to/video1.mp4"}, {"type": "text", "text": "Describe this video."}]}],
+    ## Local video frames
+    [{"role": "user", "content": [{"type": "video", "video": ["file:///path/to/extracted_frame1.jpg", "file:///path/to/extracted_frame2.jpg", "file:///path/to/extracted_frame3.jpg"],}, {"type": "text", "text": "Describe this video."},],}],
+    ## Model dynamically adjusts video nframes, video height and width. specify args if required.
+    [{"role": "user", "content": [{"type": "video", "video": "file:///path/to/video1.mp4", "fps": 2.0, "resized_height": 280, "resized_width": 280}, {"type": "text", "text": "Describe this video."}]}],
+]
+
+processor = AutoProcessor.from_pretrained(model_path)
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_path, torch_dtype="auto", device_map="auto")
+text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+images, videos, video_kwargs = process_vision_info(messages, return_video_kwargs=True)
+inputs = processor(text=text, images=images, videos=videos, padding=True, return_tensors="pt", **video_kwargs)
+print(inputs)
+generated_ids = model.generate(**inputs)
+print(generated_ids)
+```
\ No newline at end of file
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/pyproject.toml b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..64bd8a19954fe5376d4b92aa139215e0e392908c
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/pyproject.toml
@@ -0,0 +1,75 @@
+[project]
+name = "qwen-vl-utils"
+version = "0.0.10"
+description = "Qwen Vision Language Model Utils - PyTorch"
+authors = [
+    { name = "Qwen Team", email = "chenkeqin.ckq@alibaba-inc.com" },
+]
+dependencies = [
+    "requests",
+    "pillow",
+    "av",
+    "packaging",
+]
+readme = "README.md"
+requires-python = ">= 3.8"
+license = {text = "Apache-2.0"}
+keywords = [
+    'large language model',
+    'vision language model',
+    'qwen-vl',
+    'pytorch',
+]
+classifiers = [
+    'Development Status :: 4 - Beta',
+    'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    'Programming Language :: Python :: 3',
+    'License :: OSI Approved :: Apache Software License',
+]
+
+[project.urls]
+Homepage = "https://github.com/QwenLM/Qwen2-VL/tree/main/qwen-vl-utils"
+Repository = "https://github.com/QwenLM/Qwen2-VL.git"
+Issues = "https://github.com/QwenLM/Qwen2-VL/issues"
+
+[project.optional-dependencies]
+decord = [
+    "decord",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.rye]
+managed = true
+dev-dependencies = [
+    "torch",
+    "torchvision",
+]
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/qwen_vl_utils"]
+
+[tool.ruff]
+line-length = 119
+
+[tool.ruff.lint]
+ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
+select = ["C", "E", "F", "I", "W"]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401", "F403", "F811"]
+
+[tool.ruff.lint.isort]
+lines-after-imports = 2
+known-first-party = ["qwen_vl_utils"]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/requirements-dev.lock b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/requirements-dev.lock
new file mode 100644
index 0000000000000000000000000000000000000000..b6441fe5e0e112a59a2ff472528950bae3877698
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/requirements-dev.lock
@@ -0,0 +1,84 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: ["decord"]
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
+
+-e file:.
+av==12.3.0
+    # via qwen-vl-utils
+certifi==2022.12.7
+    # via requests
+charset-normalizer==2.1.1
+    # via requests
+decord==0.6.0
+    # via qwen-vl-utils
+filelock==3.13.1
+    # via torch
+    # via triton
+fsspec==2024.2.0
+    # via torch
+idna==3.4
+    # via requests
+jinja2==3.1.3
+    # via torch
+markupsafe==2.1.5
+    # via jinja2
+mpmath==1.3.0
+    # via sympy
+networkx==3.1
+    # via torch
+numpy==1.24.1
+    # via decord
+    # via torchvision
+nvidia-cublas-cu12==12.1.3.1
+    # via nvidia-cudnn-cu12
+    # via nvidia-cusolver-cu12
+    # via torch
+nvidia-cuda-cupti-cu12==12.1.105
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.1.105
+    # via torch
+nvidia-cuda-runtime-cu12==12.1.105
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.0.2.54
+    # via torch
+nvidia-curand-cu12==10.3.2.106
+    # via torch
+nvidia-cusolver-cu12==11.4.5.107
+    # via torch
+nvidia-cusparse-cu12==12.1.0.106
+    # via nvidia-cusolver-cu12
+    # via torch
+nvidia-nccl-cu12==2.20.5
+    # via torch
+nvidia-nvjitlink-cu12==12.6.68
+    # via nvidia-cusolver-cu12
+    # via nvidia-cusparse-cu12
+nvidia-nvtx-cu12==12.1.105
+    # via torch
+packaging==24.1
+    # via qwen-vl-utils
+pillow==10.2.0
+    # via qwen-vl-utils
+    # via torchvision
+requests==2.28.1
+    # via qwen-vl-utils
+sympy==1.12
+    # via torch
+torch==2.4.0
+    # via torchvision
+torchvision==0.19.0
+triton==3.0.0
+    # via torch
+typing-extensions==4.9.0
+    # via torch
+urllib3==1.26.13
+    # via requests
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/requirements.lock b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/requirements.lock
new file mode 100644
index 0000000000000000000000000000000000000000..6f9f6037aabc5fcddcef89add96150a76c51dd8a
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/requirements.lock
@@ -0,0 +1,32 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: ["decord"]
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
+
+-e file:.
+av==12.3.0
+    # via qwen-vl-utils
+certifi==2022.12.7
+    # via requests
+charset-normalizer==2.1.1
+    # via requests
+decord==0.6.0
+    # via qwen-vl-utils
+idna==3.4
+    # via requests
+numpy==1.24.4
+    # via decord
+packaging==24.1
+    # via qwen-vl-utils
+pillow==10.2.0
+    # via qwen-vl-utils
+requests==2.28.1
+    # via qwen-vl-utils
+urllib3==1.26.13
+    # via requests
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/src/qwen_vl_utils/__init__.py b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/src/qwen_vl_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..daa8708442e93d5ec3a02e863ad7ae833952d199
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/src/qwen_vl_utils/__init__.py
@@ -0,0 +1,7 @@
+from .vision_process import (
+    extract_vision_info,
+    fetch_image,
+    fetch_video,
+    process_vision_info,
+    smart_resize,
+)
diff --git a/previous_version/Video-R1-main-previous/src/qwen-vl-utils/src/qwen_vl_utils/vision_process.py b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/src/qwen_vl_utils/vision_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bc1aad912bcdf067124d8a641ee0f5a1c95b8b6
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/qwen-vl-utils/src/qwen_vl_utils/vision_process.py
@@ -0,0 +1,379 @@
+from __future__ import annotations
+
+import base64
+import logging
+import math
+import os
+import sys
+import time
+import warnings
+from functools import lru_cache
+from io import BytesIO
+
+import requests
+import torch
+import torchvision
+from packaging import version
+from PIL import Image
+from torchvision import io, transforms
+from torchvision.transforms import InterpolationMode
+from typing import Optional
+
+
+logger = logging.getLogger(__name__)
+
+IMAGE_FACTOR = 28
+MIN_PIXELS = 4 * 28 * 28
+MAX_PIXELS = 16384 * 28 * 28
+MAX_RATIO = 200
+
+# VIDEO_MIN_PIXELS = 128 * 28 * 28
+# VIDEO_MAX_PIXELS = 768 * 28 * 28
+VIDEO_MIN_PIXELS = 128 * 28 * 28
+VIDEO_MAX_PIXELS = 128 * 28 * 28
+FRAME_FACTOR = 2
+FPS = 2.0
+FPS_MIN_FRAMES = 4
+FPS_MAX_FRAMES = 16
+
+# Set the maximum number of video token inputs.
+# Here, 128K represents the maximum number of input tokens for the VLLM model.
+# Remember to adjust it according to your own configuration.
+VIDEO_TOTAL_PIXELS = int(float(os.environ.get('VIDEO_MAX_PIXELS', 128000 * 28 * 28 * 0.9)))
+logger.info(f"set VIDEO_TOTAL_PIXELS: {VIDEO_TOTAL_PIXELS}")
+
+
+def round_by_factor(number: int, factor: int) -> int:
+    """Returns the closest integer to 'number' that is divisible by 'factor'."""
+    return round(number / factor) * factor
+
+
+def ceil_by_factor(number: int, factor: int) -> int:
+    """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
+    return math.ceil(number / factor) * factor
+
+
+def floor_by_factor(number: int, factor: int) -> int:
+    """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
+    return math.floor(number / factor) * factor
+
+
+def smart_resize(
+    height: int, width: int, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS
+) -> tuple[int, int]:
+    """
+    Rescales the image so that the following conditions are met:
+
+    1. Both dimensions (height and width) are divisible by 'factor'.
+
+    2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
+
+    3. The aspect ratio of the image is maintained as closely as possible.
+    """
+    if max(height, width) / min(height, width) > MAX_RATIO:
+        raise ValueError(
+            f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
+        )
+    h_bar = max(factor, round_by_factor(height, factor))
+    w_bar = max(factor, round_by_factor(width, factor))
+    if h_bar * w_bar > max_pixels:
+        beta = math.sqrt((height * width) / max_pixels)
+        h_bar = floor_by_factor(height / beta, factor)
+        w_bar = floor_by_factor(width / beta, factor)
+    elif h_bar * w_bar < min_pixels:
+        beta = math.sqrt(min_pixels / (height * width))
+        h_bar = ceil_by_factor(height * beta, factor)
+        w_bar = ceil_by_factor(width * beta, factor)
+    return h_bar, w_bar
+
+
+def to_rgb(pil_image: Image.Image) -> Image.Image:
+      if pil_image.mode == 'RGBA':
+          white_background = Image.new("RGB", pil_image.size, (255, 255, 255))
+          white_background.paste(pil_image, mask=pil_image.split()[3])  # Use alpha channel as mask
+          return white_background
+      else:
+          return pil_image.convert("RGB")
+
+
+def fetch_image(ele: dict[str, str | Image.Image], size_factor: int = IMAGE_FACTOR) -> Image.Image:
+    if "image" in ele:
+        image = ele["image"]
+    else:
+        image = ele["image_url"]
+    image_obj = None
+    if isinstance(image, Image.Image):
+        image_obj = image
+    elif image.startswith("http://") or image.startswith("https://"):
+        response = requests.get(image, stream=True)
+        image_obj = Image.open(BytesIO(response.content))
+    elif image.startswith("file://"):
+        image_obj = Image.open(image[7:])
+    elif image.startswith("data:image"):
+        if "base64," in image:
+            _, base64_data = image.split("base64,", 1)
+            data = base64.b64decode(base64_data)
+            image_obj = Image.open(BytesIO(data))
+    else:
+        image_obj = Image.open(image)
+    if image_obj is None:
+        raise ValueError(f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image}")
+    image = to_rgb(image_obj)
+    ## resize
+    if "resized_height" in ele and "resized_width" in ele:
+        resized_height, resized_width = smart_resize(
+            ele["resized_height"],
+            ele["resized_width"],
+            factor=size_factor,
+        )
+    else:
+        width, height = image.size
+        min_pixels = ele.get("min_pixels", MIN_PIXELS)
+        max_pixels = ele.get("max_pixels", MAX_PIXELS)
+        resized_height, resized_width = smart_resize(
+            height,
+            width,
+            factor=size_factor,
+            min_pixels=min_pixels,
+            max_pixels=max_pixels,
+        )
+    image = image.resize((resized_width, resized_height))
+
+    return image
+
+
+def smart_nframes(
+    ele: dict,
+    total_frames: int,
+    video_fps: int | float,
+) -> int:
+    """calculate the number of frames for video used for model inputs.
+
+    Args:
+        ele (dict): a dict contains the configuration of video.
+            support either `fps` or `nframes`:
+                - nframes: the number of frames to extract for model inputs.
+                - fps: the fps to extract frames for model inputs.
+                    - min_frames: the minimum number of frames of the video, only used when fps is provided.
+                    - max_frames: the maximum number of frames of the video, only used when fps is provided.
+        total_frames (int): the original total number of frames of the video.
+        video_fps (int | float): the original fps of the video.
+
+    Raises:
+        ValueError: nframes should in interval [FRAME_FACTOR, total_frames].
+
+    Returns:
+        int: the number of frames for video used for model inputs.
+    """
+    assert not ("fps" in ele and "nframes" in ele), "Only accept either `fps` or `nframes`"
+    if "nframes" in ele:
+        nframes = round_by_factor(ele["nframes"], FRAME_FACTOR)
+    else:
+        fps = ele.get("fps", FPS)
+        min_frames = ceil_by_factor(ele.get("min_frames", FPS_MIN_FRAMES), FRAME_FACTOR)
+        max_frames = floor_by_factor(ele.get("max_frames", min(FPS_MAX_FRAMES, total_frames)), FRAME_FACTOR)
+        nframes = total_frames / video_fps * fps
+        if nframes > total_frames:
+            logger.warning(f"smart_nframes: nframes[{nframes}] > total_frames[{total_frames}]")
+        nframes = min(min(max(nframes, min_frames), max_frames), total_frames)
+        nframes = floor_by_factor(nframes, FRAME_FACTOR)
+    if not (FRAME_FACTOR <= nframes and nframes <= total_frames):
+        raise ValueError(f"nframes should in interval [{FRAME_FACTOR}, {total_frames}], but got {nframes}.")
+    return nframes
+
+
+def _read_video_torchvision(
+    ele: dict,
+) -> (torch.Tensor, float):
+    """read video using torchvision.io.read_video
+
+    Args:
+        ele (dict): a dict contains the configuration of video.
+        support keys:
+            - video: the path of video. support "file://", "http://", "https://" and local path.
+            - video_start: the start time of video.
+            - video_end: the end time of video.
+    Returns:
+        torch.Tensor: the video tensor with shape (T, C, H, W).
+    """
+    video_path = ele["video"]
+    if version.parse(torchvision.__version__) < version.parse("0.19.0"):
+        if "http://" in video_path or "https://" in video_path:
+            warnings.warn("torchvision < 0.19.0 does not support http/https video path, please upgrade to 0.19.0.")
+        if "file://" in video_path:
+            video_path = video_path[7:]
+    st = time.time()
+    video, audio, info = io.read_video(
+        video_path,
+        start_pts=ele.get("video_start", 0.0),
+        end_pts=ele.get("video_end", None),
+        pts_unit="sec",
+        output_format="TCHW",
+    )
+    total_frames, video_fps = video.size(0), info["video_fps"]
+    logger.info(f"torchvision:  {video_path=}, {total_frames=}, {video_fps=}, time={time.time() - st:.3f}s")
+    nframes = smart_nframes(ele, total_frames=total_frames, video_fps=video_fps)
+    idx = torch.linspace(0, total_frames - 1, nframes).round().long()
+    sample_fps = nframes / max(total_frames, 1e-6) * video_fps
+    video = video[idx]
+    return video, sample_fps
+
+
+def is_decord_available() -> bool:
+    import importlib.util
+
+    return importlib.util.find_spec("decord") is not None
+
+
+def _read_video_decord(
+    ele: dict,
+) -> (torch.Tensor, float):
+    """read video using decord.VideoReader
+
+    Args:
+        ele (dict): a dict contains the configuration of video.
+        support keys:
+            - video: the path of video. support "file://", "http://", "https://" and local path.
+            - video_start: the start time of video.
+            - video_end: the end time of video.
+    Returns:
+        torch.Tensor: the video tensor with shape (T, C, H, W).
+    """
+    import decord
+    video_path = ele["video"]
+    st = time.time()
+    vr = decord.VideoReader(video_path)
+    # TODO: support start_pts and end_pts
+    if 'video_start' in ele or 'video_end' in ele:
+        raise NotImplementedError("not support start_pts and end_pts in decord for now.")
+    total_frames, video_fps = len(vr), vr.get_avg_fps()
+    logger.info(f"decord:  {video_path=}, {total_frames=}, {video_fps=}, time={time.time() - st:.3f}s")
+    nframes = smart_nframes(ele, total_frames=total_frames, video_fps=video_fps)
+    idx = torch.linspace(0, total_frames - 1, nframes).round().long().tolist()
+    video = vr.get_batch(idx).asnumpy()
+    video = torch.tensor(video).permute(0, 3, 1, 2)  # Convert to TCHW format
+    sample_fps = nframes / max(total_frames, 1e-6) * video_fps
+    return video, sample_fps
+
+
+VIDEO_READER_BACKENDS = {
+    "decord": _read_video_decord,
+    "torchvision": _read_video_torchvision,
+}
+
+FORCE_QWENVL_VIDEO_READER = os.getenv("FORCE_QWENVL_VIDEO_READER", None)
+
+
+@lru_cache(maxsize=1)
+def get_video_reader_backend() -> str:
+    if FORCE_QWENVL_VIDEO_READER is not None:
+        video_reader_backend = FORCE_QWENVL_VIDEO_READER
+    elif is_decord_available():
+        video_reader_backend = "decord"
+    else:
+        video_reader_backend = "torchvision"
+    print(f"qwen-vl-utils using {video_reader_backend} to read video.", file=sys.stderr)
+    return video_reader_backend
+
+
+def fetch_video(ele: dict, image_factor: int = IMAGE_FACTOR, return_video_sample_fps: bool = False) -> torch.Tensor | list[Image.Image]:
+    if isinstance(ele["video"], str):
+        video_reader_backend = get_video_reader_backend()
+        try:
+            video, sample_fps = VIDEO_READER_BACKENDS[video_reader_backend](ele)
+        except Exception as e:
+            logger.warning(f"video_reader_backend {video_reader_backend} error, use torchvision as default, msg: {e}")
+            video, sample_fps = VIDEO_READER_BACKENDS["torchvision"](ele)
+
+        nframes, _, height, width = video.shape
+        min_pixels = ele.get("min_pixels", VIDEO_MIN_PIXELS)
+        total_pixels = ele.get("total_pixels", VIDEO_TOTAL_PIXELS)
+        max_pixels = max(min(VIDEO_MAX_PIXELS, total_pixels / nframes * FRAME_FACTOR), int(min_pixels * 1.05))
+        max_pixels_supposed = ele.get("max_pixels", max_pixels)
+        if max_pixels_supposed > max_pixels:
+            logger.warning(f"The given max_pixels[{max_pixels_supposed}] exceeds limit[{max_pixels}].")
+        max_pixels = min(max_pixels_supposed, max_pixels)
+        if "resized_height" in ele and "resized_width" in ele:
+            resized_height, resized_width = smart_resize(
+                ele["resized_height"],
+                ele["resized_width"],
+                factor=image_factor,
+            )
+        else:
+            resized_height, resized_width = smart_resize(
+                height,
+                width,
+                factor=image_factor,
+                min_pixels=min_pixels,
+                max_pixels=max_pixels,
+            )
+        video = transforms.functional.resize(
+            video,
+            [resized_height, resized_width],
+            interpolation=InterpolationMode.BICUBIC,
+            antialias=True,
+        ).float()
+        if return_video_sample_fps:
+            return video, sample_fps
+        return video
+    else:
+        assert isinstance(ele["video"], (list, tuple))
+        process_info = ele.copy()
+        process_info.pop("type", None)
+        process_info.pop("video", None)
+        images = [
+            fetch_image({"image": video_element, **process_info}, size_factor=image_factor)
+            for video_element in ele["video"]
+        ]
+        nframes = ceil_by_factor(len(images), FRAME_FACTOR)
+        if len(images) < nframes:
+            images.extend([images[-1]] * (nframes - len(images)))
+        if return_video_sample_fps:
+            return images, process_info.pop("fps", 2.0)
+        return images
+
+
+def extract_vision_info(conversations: list[dict] | list[list[dict]]) -> list[dict]:
+    vision_infos = []
+    if isinstance(conversations[0], dict):
+        conversations = [conversations]
+    for conversation in conversations:
+        for message in conversation:
+            if isinstance(message["content"], list):
+                for ele in message["content"]:
+                    if (
+                        "image" in ele
+                        or "image_url" in ele
+                        or "video" in ele
+                        or ele["type"] in ("image", "image_url", "video")
+                    ):
+                        vision_infos.append(ele)
+    return vision_infos
+
+
+def process_vision_info(
+    conversations: list[dict] | list[list[dict]],
+    return_video_kwargs: bool = False,
+) -> tuple[list[Image.Image] | None, list[torch.Tensor | list[Image.Image]] | None, Optional[dict]]:
+
+    vision_infos = extract_vision_info(conversations)
+    ## Read images or videos
+    image_inputs = []
+    video_inputs = []
+    video_sample_fps_list = []
+    for vision_info in vision_infos:
+        if "image" in vision_info or "image_url" in vision_info:
+            image_inputs.append(fetch_image(vision_info))
+        elif "video" in vision_info:
+            video_input, video_sample_fps = fetch_video(vision_info, return_video_sample_fps=True)
+            video_sample_fps_list.append(video_sample_fps)
+            video_inputs.append(video_input)
+        else:
+            raise ValueError("image, image_url or video should in content.")
+    if len(image_inputs) == 0:
+        image_inputs = None
+    if len(video_inputs) == 0:
+        video_inputs = None
+    if return_video_kwargs:
+        return image_inputs, video_inputs, {'fps': video_sample_fps_list}
+    return image_inputs, video_inputs
diff --git a/previous_version/Video-R1-main-previous/src/r1-v/temp_image.png b/previous_version/Video-R1-main-previous/src/r1-v/temp_image.png
new file mode 100644
index 0000000000000000000000000000000000000000..4d297bbb99f1cf1321ebf2da2bf369069539b86a
--- /dev/null
+++ b/previous_version/Video-R1-main-previous/src/r1-v/temp_image.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d32d2be631fcae3fcf15b31fb57096fdba3c4c6e5417f8cab84f5c16e7ce18f
+size 147401
diff --git a/src/r1-v/.gitignore b/src/r1-v/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..5c28ec81a869f992b0db859a957215c1608bfc2a
--- /dev/null
+++ b/src/r1-v/.gitignore
@@ -0,0 +1,178 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# PyPI configuration file
+.pypirc
+
+# Temp folders
+data/
+wandb/
+scripts/
+checkpoints/
+.vscode/
\ No newline at end of file
diff --git a/src/r1-v/LICENSE b/src/r1-v/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64
--- /dev/null
+++ b/src/r1-v/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/src/r1-v/Makefile b/src/r1-v/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..47999e65c24d98abb5fee6f072a43aa9d6b0c101
--- /dev/null
+++ b/src/r1-v/Makefile
@@ -0,0 +1,20 @@
+.PHONY: style quality
+
+# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
+export PYTHONPATH = src
+
+check_dirs := src
+
+style:
+	black --line-length 119 --target-version py310 $(check_dirs) setup.py
+	isort $(check_dirs) setup.py
+
+quality:
+	black --check --line-length 119 --target-version py310 $(check_dirs) setup.py
+	isort --check-only $(check_dirs) setup.py
+	flake8 --max-line-length 119 $(check_dirs) setup.py
+
+
+# Evaluation
+
+evaluate:
diff --git a/src/r1-v/setup.cfg b/src/r1-v/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..5fa1d655611f7509de9130ac8dd482fc4b4f2dae
--- /dev/null
+++ b/src/r1-v/setup.cfg
@@ -0,0 +1,41 @@
+[isort]
+default_section = FIRSTPARTY
+ensure_newline_before_comments = True
+force_grid_wrap = 0
+include_trailing_comma = True
+known_first_party = open_r1
+known_third_party =
+    transformers
+    datasets
+    fugashi
+    git
+    h5py
+    matplotlib
+    nltk
+    numpy
+    packaging
+    pandas
+    psutil
+    pytest
+    rouge_score
+    sacrebleu
+    seqeval
+    sklearn
+    streamlit
+    torch
+    tqdm
+
+line_length = 119
+lines_after_imports = 2
+multi_line_output = 3
+use_parentheses = True
+
+[flake8]
+ignore = E203, E501, E741, W503, W605
+max-line-length = 119
+per-file-ignores =
+    # imported but unused
+    __init__.py: F401
+
+[tool:pytest]
+doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
\ No newline at end of file
diff --git a/src/r1-v/setup.py b/src/r1-v/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e9b0c3bf6af97ef448614eda7159c58ab807230
--- /dev/null
+++ b/src/r1-v/setup.py
@@ -0,0 +1,132 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Adapted from huggingface/transformers: https://github.com/huggingface/transformers/blob/21a2d900eceeded7be9edc445b56877b95eda4ca/setup.py
+
+
+import re
+import shutil
+from pathlib import Path
+
+from setuptools import find_packages, setup
+
+
+# Remove stale open_r1.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
+stale_egg_info = Path(__file__).parent / "open_r1.egg-info"
+if stale_egg_info.exists():
+    print(
+        (
+            "Warning: {} exists.\n\n"
+            "If you recently updated open_r1, this is expected,\n"
+            "but it may prevent open_r1 from installing in editable mode.\n\n"
+            "This directory is automatically generated by Python's packaging tools.\n"
+            "I will remove it now.\n\n"
+            "See https://github.com/pypa/pip/issues/5466 for details.\n"
+        ).format(stale_egg_info)
+    )
+    shutil.rmtree(stale_egg_info)
+
+
+# IMPORTANT: all dependencies should be listed here with their version requirements, if any.
+#   * If a dependency is fast-moving (e.g. transformers), pin to the exact version
+_deps = [
+    "accelerate>=1.2.1",
+    "bitsandbytes>=0.43.0",
+    "black>=24.4.2",
+    "datasets>=3.2.0",
+    "deepspeed==0.15.4",
+    "distilabel[vllm,ray,openai]>=1.5.2",
+    "einops>=0.8.0",
+    "flake8>=6.0.0",
+    "hf_transfer>=0.1.4",
+    "huggingface-hub[cli]>=0.19.2,<1.0",
+    "isort>=5.12.0",
+    "liger_kernel==0.5.2",
+    "lighteval @ git+https://github.com/huggingface/lighteval.git@4f381b352c0e467b5870a97d41cb66b487a2c503#egg=lighteval[math]",
+    "math-verify",  # Used for math verification in grpo
+    "packaging>=23.0",
+    "parameterized>=0.9.0",
+    "pytest",
+    "safetensors>=0.3.3",
+    "sentencepiece>=0.1.99",
+    "torch>=2.5.1",
+    # "transformers @ git+https://github.com/huggingface/transformers.git@336dc69d63d56f232a183a3e7f52790429b871ef",
+    "trl==0.16.0",
+    "vllm==0.7.2",
+    "wandb>=0.19.1",
+    "pillow",
+]
+
+# this is a lookup table with items like:
+#
+# tokenizers: "tokenizers==0.9.4"
+# packaging: "packaging"
+#
+# some of the values are versioned whereas others aren't.
+deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ \[\]]+)(?:\[[^\]]+\])?(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)}
+
+
+def deps_list(*pkgs):
+    return [deps[pkg] for pkg in pkgs]
+
+
+extras = {}
+extras["tests"] = deps_list("pytest", "parameterized")
+extras["torch"] = deps_list("torch")
+extras["quality"] = deps_list("black", "isort", "flake8")
+extras["eval"] = deps_list("lighteval", "math-verify")
+extras["dev"] = extras["quality"] + extras["tests"] + extras["eval"]
+
+# core dependencies shared across the whole project - keep this to a bare minimum :)
+install_requires = [
+    deps["accelerate"],
+    deps["bitsandbytes"],
+    deps["einops"],
+    deps["datasets"],
+    deps["deepspeed"],
+    deps["hf_transfer"],
+    deps["huggingface-hub"],
+    deps["liger_kernel"],
+    deps["packaging"],  # utilities from PyPA to e.g., compare versions
+    deps["safetensors"],
+    deps["sentencepiece"],
+    # deps["transformers"],
+    deps["trl"],
+]
+
+setup(
+    name="r1-v",
+    version="0.1.0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    author="The r1-v team and the Hugging Face team (past and future)",
+    description="R1-V",
+    license="Apache",
+    url="https://github.com/Deep-Agent/R1-V",
+    package_dir={"": "src"},
+    packages=find_packages("src"),
+    zip_safe=False,
+    extras_require=extras,
+    python_requires=">=3.10.9",
+    install_requires=install_requires,
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.10",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+)
diff --git a/src/r1-v/src/open_r1/__init__.py b/src/r1-v/src/open_r1/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/r1-v/src/open_r1/evaluate.py b/src/r1-v/src/open_r1/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef3089fff4ecc4753b10b585fe172a2c93af4d9d
--- /dev/null
+++ b/src/r1-v/src/open_r1/evaluate.py
@@ -0,0 +1,85 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Custom evaluation tasks for LightEval."""
+
+from lighteval.metrics.dynamic_metrics import (
+    ExprExtractionConfig,
+    LatexExtractionConfig,
+    multilingual_extractive_match_metric,
+)
+from lighteval.tasks.lighteval_task import LightevalTaskConfig
+from lighteval.tasks.requests import Doc
+from lighteval.utils.language import Language
+
+
+metric = multilingual_extractive_match_metric(
+    language=Language.ENGLISH,
+    fallback_mode="first_match",
+    precision=5,
+    gold_extraction_target=(LatexExtractionConfig(),),
+    pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
+    aggregation_function=max,
+)
+
+
+def prompt_fn(line, task_name: str = None):
+    """Assumes the model is either prompted to emit \\boxed{answer} or does so automatically"""
+    return Doc(
+        task_name=task_name,
+        query=line["problem"],
+        choices=[line["solution"]],
+        gold_index=0,
+    )
+
+
+# Define tasks
+aime24 = LightevalTaskConfig(
+    name="aime24",
+    suite=["custom"],
+    prompt_function=prompt_fn,
+    hf_repo="HuggingFaceH4/aime_2024",
+    hf_subset="default",
+    hf_avail_splits=["train"],
+    evaluation_splits=["train"],
+    few_shots_split=None,
+    few_shots_select=None,
+    generation_size=32768,
+    metric=[metric],
+    version=1,
+)
+math_500 = LightevalTaskConfig(
+    name="math_500",
+    suite=["custom"],
+    prompt_function=prompt_fn,
+    hf_repo="HuggingFaceH4/MATH-500",
+    hf_subset="default",
+    hf_avail_splits=["test"],
+    evaluation_splits=["test"],
+    few_shots_split=None,
+    few_shots_select=None,
+    generation_size=32768,
+    metric=[metric],
+    version=1,
+)
+
+# Add tasks to the table
+TASKS_TABLE = []
+TASKS_TABLE.append(aime24)
+TASKS_TABLE.append(math_500)
+
+# MODULE LOGIC
+if __name__ == "__main__":
+    print([t["name"] for t in TASKS_TABLE])
+    print(len(TASKS_TABLE))
diff --git a/src/r1-v/src/open_r1/generate.py b/src/r1-v/src/open_r1/generate.py
new file mode 100644
index 0000000000000000000000000000000000000000..740621018693a72edfc738ef44291a9d39c18132
--- /dev/null
+++ b/src/r1-v/src/open_r1/generate.py
@@ -0,0 +1,156 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+from distilabel.llms import OpenAILLM
+from distilabel.pipeline import Pipeline
+from distilabel.steps.tasks import TextGeneration
+
+
+def build_distilabel_pipeline(
+    model: str,
+    base_url: str = "http://localhost:8000/v1",
+    prompt_column: Optional[str] = None,
+    temperature: Optional[float] = None,
+    top_p: Optional[float] = None,
+    max_new_tokens: int = 8192,
+    num_generations: int = 1,
+) -> Pipeline:
+    generation_kwargs = {"max_new_tokens": max_new_tokens}
+
+    if temperature is not None:
+        generation_kwargs["temperature"] = temperature
+
+    if top_p is not None:
+        generation_kwargs["top_p"] = top_p
+
+    with Pipeline().ray() as pipeline:
+        TextGeneration(
+            llm=OpenAILLM(
+                base_url=base_url,
+                api_key="something",
+                model=model,
+                # thinking can take some time...
+                timeout=10 * 60,
+                generation_kwargs=generation_kwargs,
+            ),
+            input_mappings={"instruction": prompt_column} if prompt_column is not None else {},
+            input_batch_size=64,  # on 4 nodes bs ~60+ leads to preemption due to KV cache exhaustion
+            num_generations=num_generations,
+        )
+
+    return pipeline
+
+
+if __name__ == "__main__":
+    import argparse
+
+    from datasets import load_dataset
+
+    parser = argparse.ArgumentParser(description="Run distilabel pipeline for generating responses with DeepSeek R1")
+    parser.add_argument(
+        "--hf-dataset",
+        type=str,
+        required=True,
+        help="HuggingFace dataset to load",
+    )
+    parser.add_argument(
+        "--hf-dataset-config",
+        type=str,
+        required=False,
+        help="Dataset config to use",
+    )
+    parser.add_argument(
+        "--hf-dataset-split",
+        type=str,
+        default="train",
+        help="Dataset split to use",
+    )
+    parser.add_argument("--prompt-column", type=str, default="prompt")
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Model name to use for generation",
+    )
+    parser.add_argument(
+        "--vllm-server-url",
+        type=str,
+        default="http://localhost:8000/v1",
+        help="URL of the vLLM server",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        help="Temperature for generation",
+    )
+    parser.add_argument(
+        "--top-p",
+        type=float,
+        help="Top-p value for generation",
+    )
+    parser.add_argument(
+        "--max-new-tokens",
+        type=int,
+        default=8192,
+        help="Maximum number of new tokens to generate",
+    )
+    parser.add_argument(
+        "--num-generations",
+        type=int,
+        default=1,
+        help="Number of generations per problem",
+    )
+    parser.add_argument(
+        "--hf-output-dataset",
+        type=str,
+        required=False,
+        help="HuggingFace repo to push results to",
+    )
+    parser.add_argument(
+        "--private",
+        action="store_true",
+        help="Whether to make the output dataset private when pushing to HF Hub",
+    )
+
+    args = parser.parse_args()
+
+    print("\nRunning with arguments:")
+    for arg, value in vars(args).items():
+        print(f"  {arg}: {value}")
+    print()
+
+    print(f"Loading '{args.hf_dataset}' (config: {args.hf_dataset_config}, split: {args.hf_dataset_split}) dataset...")
+    dataset = load_dataset(args.hf_dataset, split=args.hf_dataset_split)
+    print("Dataset loaded!")
+
+    pipeline = build_distilabel_pipeline(
+        model=args.model,
+        base_url=args.vllm_server_url,
+        prompt_column=args.prompt_column,
+        temperature=args.temperature,
+        top_p=args.top_p,
+        max_new_tokens=args.max_new_tokens,
+        num_generations=args.num_generations,
+    )
+
+    print("Running generation pipeline...")
+    distiset = pipeline.run(dataset=dataset, use_cache=False)
+    print("Generation pipeline finished!")
+
+    if args.hf_output_dataset:
+        print(f"Pushing resulting dataset to '{args.hf_output_dataset}'...")
+        distiset.push_to_hub(args.hf_output_dataset, private=args.private)
+        print("Dataset pushed!")
diff --git a/src/r1-v/src/open_r1/grpo-cot-72BEval.py b/src/r1-v/src/open_r1/grpo-cot-72BEval.py
new file mode 100644
index 0000000000000000000000000000000000000000..64a1b945433343d3a96882e79067eb668c64d331
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-72BEval.py
@@ -0,0 +1,489 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModifiedOrig
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+from openai import OpenAI
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+# from qa_metrics.answerBERT import AnswerBertActor
+
+# pedant = PEDANT()
+# answerBERT = AnswerBertActor(device='cuda:7')
+client = OpenAI(
+    base_url="http://29.81.228.243:8081 /v1",  # your vLLM server
+    api_key="ANYKEY",                        # if you set --api-key when launching
+)
+
+def validate_description(description, question):
+    input_message = "You are provided a text description of a problem and a question. Determine the answer to the question based on the text description. First provide a step-by-step reasoning within <think> </think> tags, then provide your answer as a single final answer, single letter choice, or a short phrase ENCLOSED with <answer> </answer> tags. \nText description: {Description}\nQuestion: {Question}\nPlease only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions.".format(Description=description, Question=question)
+    response = client.chat.completions.create(
+        model="Qwen2.5-72B-Instruct",          # **must match** the returned id
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user",   "content": input_message}
+        ]
+    )
+    
+    # print('*'*10)
+    # print('Input Prompt: ', input_message)
+    # print('-'*10)
+    # print('Output Message: ', response.choices[0].message.content)
+    # print('-'*10)
+    # time.sleep(40)
+    
+    return response.choices[0].message.content
+
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def extract_description(predict: str) -> Optional[str]:
+        """
+        Extracts the content of the <answer>…</answer> block from `predict`.
+        Returns the inner text (with leading/trailing whitespace stripped),
+        or None if no <answer> tag is found.
+        """
+        match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+        if not match:
+            return predict
+        return match.group(1).strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+
+    question_type = kwargs['problem_type'][0]
+    questions = kwargs['problem']
+    # questions = kwargs['prompt']
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+    
+    
+    extracted_content_descriptions = [extract_description(str(ele)) for ele in contents]
+    description_answer_outputs = []
+    
+    
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        futures = [
+            executor.submit(validate_description, desc, q)
+            for desc, q in zip(extracted_content_descriptions, questions)
+        ]
+        for future in as_completed(futures):
+            try:
+                description_answer_outputs.append(future.result())
+            except Exception as e:
+                # handle/log e
+                # description_answer_outputs.append(None)
+                print('Description output error: ', e)
+                description_answer_outputs.append(0)
+    
+    
+    contents = [str(ele) for ele in contents]
+    description_answer_outputs = [str(ele) for ele in description_answer_outputs]
+ 
+    gt_answers = [extract_answer(str(sol)) for sol in solution]
+    extracted_description_outputs = [extract_answer(str(description_answer_outputs[index_description])) for index_description in range(len(description_answer_outputs))]
+    
+    
+    # print('GT answers: ', gt_answers)
+    # print('Description answers: ', description_answer_outputs[0])
+    # print('-'*10)
+    # import time
+    # time.sleep(10)
+    
+    description_rewards = [compute_math_score_single(extracted_description_outputs[count_idx], gt_answers[count_idx]) for count_idx in range(len(description_answer_outputs))]
+    
+    # print('()'*10)
+    # print("Question: ", questions[0])
+    # print(gt_answers)
+    # print('Description outputs', description_answer_outputs[0])
+    # print(description_rewards)
+    # print('-'*10)
+    # time.sleep(30)
+    
+    
+    # for content, sol, description_reward in zip(contents, solution, description_rewards):
+    for content, gt_ans, description_reward in zip(contents, gt_answers, description_rewards):
+        try:
+            output_ans = extract_answer(str(content))
+            # gt_ans = extract_answer(sol)
+
+            if question_type == "OCR":
+                # description_extraction = extract_answer(str(second_content))
+                # description_error_rate = wer(gt_ans, description_extraction)
+                description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+                # error_rate = wer(gt_ans, output_ans)
+                answer_pedant_reward = pedant.get_score(gt_ans, output_ans, question)
+                # reward = (1 - error_rate) + (1- description_error_rate)
+                # reward = max(0.0, min(2.0, reward))
+                # print('Extracted description: ', description_extraction)
+                # print('Generated answer: ', output_ans)
+                # print('Sol: ', gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                reward = description_pendat_reward + answer_pedant_reward
+            # elif question_type == "free-form":
+            #     score = compute_rouge_score(gt_ans, output_ans)
+            #     reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            elif question_type == 'math' or question_type == 'unify' or question_type == "multiple choice" or question_type == "numerical":
+                # description_reward = compute_math_score_single(description_extraction, gt_ans)
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                reward = description_reward + answer_reward
+                # reward = answer_reward
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {gt_ans}\n")
+    
+    # print("rewards: ", rewards)
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+#     "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+#     "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+#     "<think> reasoning process here </think><answer> answer here </answer>"
+# )
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>. Please only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "Please think about this question as if you were a human pondering deeply. "
+    #     "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+    #     "It's encouraged to include self-reflection or verification in the reasoning process. "
+    #     "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    # )
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>. Please only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions."
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        # msg ={
+        #     "prompt": 
+        #        [{
+        #             "role": "user",
+        #             "content": [
+        #                 {
+        #                     "type": example['data_type'],
+        #                     # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+        #                 },
+        #                 {
+        #                     "type": "text",
+        #                     "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+        #                 }
+        #                 ]
+        #         }]
+        #     }
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question)
+                        }
+                        ]
+                }]
+            }
+        
+        # return msg
+        return {
+            "prompt": msg["prompt"],
+            "problem": question,
+        }
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    # print('Example problem')
+    # print(dataset['train']['problem'][10])
+    # time.sleep(30)
+    
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModifiedOrig
+    print("using: ", trainer_cls)
+    
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-LLMEval.py b/src/r1-v/src/open_r1/grpo-cot-LLMEval.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f2b5ee1e707371d011d331da18db13e07709b71
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-LLMEval.py
@@ -0,0 +1,552 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModifiedOrig
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+# from utils.gpt_eval import infer
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+# from qa_metrics.answerBERT import AnswerBertActor
+
+# pedant = PEDANT()
+# answerBERT = AnswerBertActor(device='cuda:7')
+
+alpha = 1.0
+
+TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+
+'''
+gpt infer
+'''
+import os
+from openai import AzureOpenAI
+import time
+
+import base64
+from mimetypes import guess_type
+
+
+def azure_gpt4(messages, model):
+    outputs = []
+    for message in messages:
+        input_prompt = [
+                { "role": "system", "content": "You are a helpful assistant." },
+                { "role": "user", "content": [  
+                    { 
+                        "type": "text", 
+                        "text": message["instruction"] 
+                    },
+                    # { 
+                    #     "type": "image_url",
+                    #     "image_url": {
+                    #         "url": message["image"]
+                    #         }
+                    # }
+                ]} 
+            ]
+        ## try N times if API exceed limit ... 
+        for i in range(10):
+            try:
+                output = client.chat.completions.create(
+                    model=model, messages=input_prompt, max_tokens=2000 
+                )
+
+                output_text = output.choices[0].message.content
+                break ## exit if successful
+            
+            except Exception as e:
+                print(f'Index {i} got error message: {e}')
+                output_text = ''
+                time.sleep(3)
+
+        outputs.append(output_text)    
+
+    return outputs
+
+
+client = AzureOpenAI(
+        api_key = "83f30a2a22324395b854bd343db38d85",  
+        api_version = "2024-08-01-preview",
+        azure_endpoint = "https://francecentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
+        )
+
+model = "gpt-4o"
+prompt_template = '''You are provided a text description of a problem and a question. Determine the answer to the question based on the text description. Provide your answer as a single final answer or a short phrase enclosed with <answer></answer>. \nText description: {text}\nQuestion: {question}'''
+
+
+def infer(prompt):
+    # prompt_question = prompt_question.replace('<image>', '')
+    # prompt = prompt_template.replace('{text}', text).replace('{question}', prompt_question)
+    
+    messages = [
+            {"instruction": prompt}, 
+            ]
+    prompt_success = False
+    prompt_time = 0
+    outputs = ['<answer> None </answer>']
+    while prompt_success == False and prompt_time <= 2:
+        try:
+            outputs = azure_gpt4(messages, model)
+            prompt_success = True
+        except:
+            prompt_time += 1
+            time.sleep(5)
+    
+    return outputs[0]
+
+'''
+end of gpt infer
+'''
+
+
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+def _call_infer(desc):
+    return infer(desc)
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def extract_description(predict: str) -> Optional[str]:
+        """
+        Extracts the content of the <answer>…</answer> block from `predict`.
+        Returns the inner text (with leading/trailing whitespace stripped),
+        or None if no <answer> tag is found.
+        """
+        match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+        if not match:
+            return predict
+        return match.group(1).strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    # second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    questions = kwargs['problem']
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    extracted_content_descriptions = [extract_description(ele) for ele in contents]
+    
+    description_query_inputs = []
+    
+    for index in range(len(extracted_content_descriptions)):
+        prompt_question = questions[index]
+        des_text = extracted_content_descriptions[index]
+        prompt_question = prompt_question.replace('<image>', '')
+        prompt_input = prompt_template.replace('{text}', des_text).replace('{question}', prompt_question) + TYPE_TEMPLATE[question_type]
+        description_query_inputs.append(prompt_input)
+    
+    
+    description_score_outputs = []
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        # kick off all the futures
+        # futures = [
+        #     executor.submit(_call_infer, desc, ques)
+        #     for desc, ques in zip(extracted_content_descriptions, questions)
+        # ]
+        futures = [
+            executor.submit(_call_infer, desc)
+            for desc in description_query_inputs
+        ]
+        # collect as they finish (optional—keeps order of completion)
+        for fut in as_completed(futures):
+            description_score_outputs.append(extract_answer(fut.result()))
+    
+    gt_answers = [extract_answer(sol) for sol in solution]
+    description_rewards = [compute_math_score_single(description_score_outputs[count_idx], gt_answers[count_idx]) for count_idx in range(len(description_score_outputs))]
+    
+    # print(gt_answers)
+    # print(description_score_outputs)
+    # print(description_rewards)
+    # print('-'*10)
+    
+    
+    for content, gt_ans, description_reward in zip(contents, gt_answers, description_rewards):
+    # for content, sol, question in zip(contents, solution, questions):
+    # for content, sol, second_content in zip(contents, solution, second_completions):
+        try:
+            output_ans = extract_answer(content)
+            # gt_ans = extract_answer(sol)
+            # description_extraction = extract_answer(second_content)
+            # if question_type == "multiple choice":
+            #     reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            # elif question_type == "numerical":
+            #     gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+            #     out_has_decimal = ("." in output_ans) or ("," in output_ans)
+            #     if gt_has_decimal != out_has_decimal:
+            #         reward = 0.0
+            #     else:
+            #         gt_number = normalize_number(gt_ans)
+            #         out_number = normalize_number(output_ans)
+            #         if gt_number is None or out_number is None:
+            #             reward = 0.0
+            #         else:
+            #             reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            if question_type == "OCR":
+                # description_extraction = extract_answer(second_content)
+                # description_error_rate = wer(gt_ans, description_extraction)
+                # description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+                # error_rate = wer(gt_ans, output_ans)
+                answer_pedant_reward = pedant.get_score(gt_ans, output_ans, questions[0])
+                # reward = (1 - error_rate) + (1- description_error_rate)
+                # reward = max(0.0, min(2.0, reward))
+                # print('Extracted description: ', description_extraction)
+                # print('Generated answer: ', output_ans)
+                # print('Sol: ', gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                # reward = description_pendat_reward + answer_pedant_reward
+                reward = answer_pedant_reward
+            # elif question_type == "free-form":
+            #     score = compute_rouge_score(gt_ans, output_ans)
+            #     reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            elif question_type == 'math' or question_type == 'unify' or question_type == "multiple choice" or question_type == "numerical":
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                
+                
+                # print(f"Extracted description: {description_extraction} | Generated answer: {output_ans} | Sol: {gt_ans}")
+                # print(f'Description reward: {description_reward} | answer reward: {answer_reward} | final reward: {reward}')
+                # print('-' * 10)
+                
+                if description_reward == 0 and answer_reward == 1:
+                    reward = alpha
+                else:
+                    reward = description_reward + answer_reward
+                # reward = answer_reward
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {gt_ans}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+#     "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+#     "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+#     "<think> reasoning process here </think><answer> answer here </answer>"
+# )
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "Please think about this question as if you were a human pondering deeply. "
+    #     "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+    #     "It's encouraged to include self-reflection or verification in the reasoning process. "
+    #     "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    # )
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>"
+    )
+
+
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModifiedOrig
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-answerBERT-eval.py b/src/r1-v/src/open_r1/grpo-cot-answerBERT-eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..7591f8cb884051ef4ea422f462798f8cce332c16
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-answerBERT-eval.py
@@ -0,0 +1,429 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModifiedOrig
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+from qa_metrics.answerBERT import AnswerBertActor
+
+# pedant = PEDANT()
+answerBERT = AnswerBertActor(device='cuda:0')
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def extract_description(predict: str) -> Optional[str]:
+        """
+        Extracts the content of the <answer>…</answer> block from `predict`.
+        Returns the inner text (with leading/trailing whitespace stripped),
+        or None if no <answer> tag is found.
+        """
+        match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+        if not match:
+            return predict
+        return match.group(1).strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    # second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    questions = kwargs['problem']
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    extracted_content_descriptions = [extract_description(ele) for ele in contents]
+    # extracted_content_answers = [extract_answer(ele) for ele in contents]
+    # model     = kwargs.get("model")      # may be None if called elsewhere
+    # tokenizer = kwargs.get("tokenizer")
+    # # (optional) example use: let the model score the generated answer
+    # if model is not None and tokenizer is not None:
+    #     model.eval()
+    description_inputs = [questions[index_count] + ' [SEP] ' + extracted_content_descriptions[index_count] for index_count in range(len(extracted_content_descriptions))]
+    description_rewards = answerBERT.batch_predict(description_inputs, batch_size = 32)
+    
+    for content, sol, description_reward in zip(contents, solution, description_rewards):
+    # for content, sol, question in zip(contents, solution, questions):
+    # for content, sol, second_content in zip(contents, solution, second_completions):
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            # description_extraction = extract_answer(second_content)
+            # if question_type == "multiple choice":
+            #     reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            # elif question_type == "numerical":
+            #     gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+            #     out_has_decimal = ("." in output_ans) or ("," in output_ans)
+            #     if gt_has_decimal != out_has_decimal:
+            #         reward = 0.0
+            #     else:
+            #         gt_number = normalize_number(gt_ans)
+            #         out_number = normalize_number(output_ans)
+            #         if gt_number is None or out_number is None:
+            #             reward = 0.0
+            #         else:
+            #             reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            if question_type == "OCR":
+                # description_extraction = extract_answer(second_content)
+                # description_error_rate = wer(gt_ans, description_extraction)
+                description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+                # error_rate = wer(gt_ans, output_ans)
+                answer_pedant_reward = pedant.get_score(gt_ans, output_ans, question)
+                # reward = (1 - error_rate) + (1- description_error_rate)
+                # reward = max(0.0, min(2.0, reward))
+                # print('Extracted description: ', description_extraction)
+                # print('Generated answer: ', output_ans)
+                # print('Sol: ', gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                reward = description_pendat_reward + answer_pedant_reward
+            # elif question_type == "free-form":
+            #     score = compute_rouge_score(gt_ans, output_ans)
+            #     reward = max(0.0, min(1.0, score))
+            # elif question_type == "regression":
+            #     gt_number = normalize_number(gt_ans)
+            #     out_number = normalize_number(output_ans)
+            #     if gt_number is None or out_number is None:
+            #         reward = 0.0
+            #     rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+            #     rel_diff = min(1.0, max(0.0, rel_diff))
+            #     reward = 1 - rel_diff
+            elif question_type == 'math' or question_type == 'unify' or question_type == "multiple choice" or question_type == "numerical" or question_type == "regression":
+                # print('Extracted description: ', description_extraction)
+                # print('Generated answer: ', output_ans)
+                # print('Sol: ', gt_ans)
+                
+                # description_reward = compute_math_score_single(description_extraction, gt_ans)
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                reward = description_reward + answer_reward
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+#     "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+#     "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+#     "<think> reasoning process here </think><answer> answer here </answer>"
+# )
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "Please think about this question as if you were a human pondering deeply. "
+    #     "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+    #     "It's encouraged to include self-reflection or verification in the reasoning process. "
+    #     "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    # )
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>"
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModifiedOrig
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-noDesEval.py b/src/r1-v/src/open_r1/grpo-cot-noDesEval.py
new file mode 100644
index 0000000000000000000000000000000000000000..1df1dea20c6d805701297b17810a8ccf5a725329
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-noDesEval.py
@@ -0,0 +1,446 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModifiedOrig
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+# from qa_metrics.answerBERT import AnswerBertActor
+
+# pedant = PEDANT()
+# answerBERT = AnswerBertActor(device='cuda:7')
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def extract_description(predict: str) -> Optional[str]:
+        """
+        Extracts the content of the <answer>…</answer> block from `predict`.
+        Returns the inner text (with leading/trailing whitespace stripped),
+        or None if no <answer> tag is found.
+        """
+        match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+        if not match:
+            return predict
+        return match.group(1).strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    # second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    questions = kwargs['problem']
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    # extracted_content_descriptions = [extract_description(ele) for ele in contents]
+    # extracted_content_answers = [extract_answer(ele) for ele in contents]
+    # model     = kwargs.get("model")      # may be None if called elsewhere
+    # tokenizer = kwargs.get("tokenizer")
+    # # (optional) example use: let the model score the generated answer
+    # if model is not None and tokenizer is not None:
+    #     model.eval()
+    # description_inputs = [questions[index_count] + ' [SEP] ' + extracted_content_descriptions[index_count] for index_count in range(len(extracted_content_descriptions))]
+    # description_rewards = answerBERT.batch_predict(description_inputs, batch_size = 64)
+    
+    for content, sol in zip(contents, solution):
+    # for content, sol, question in zip(contents, solution, questions):
+    # for content, sol, second_content in zip(contents, solution, second_completions):
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            # description_extraction = extract_answer(second_content)
+            # if question_type == "multiple choice":
+            #     reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            # elif question_type == "numerical":
+            #     gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+            #     out_has_decimal = ("." in output_ans) or ("," in output_ans)
+            #     if gt_has_decimal != out_has_decimal:
+            #         reward = 0.0
+            #     else:
+            #         gt_number = normalize_number(gt_ans)
+            #         out_number = normalize_number(output_ans)
+            #         if gt_number is None or out_number is None:
+            #             reward = 0.0
+            #         else:
+            #             reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            if question_type == "OCR":
+                # description_extraction = extract_answer(second_content)
+                # description_error_rate = wer(gt_ans, description_extraction)
+                description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+                # error_rate = wer(gt_ans, output_ans)
+                answer_pedant_reward = pedant.get_score(gt_ans, output_ans, question)
+                # reward = (1 - error_rate) + (1- description_error_rate)
+                # reward = max(0.0, min(2.0, reward))
+                # print('Extracted description: ', description_extraction)
+                # print('Generated answer: ', output_ans)
+                # print('Sol: ', gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                reward = description_pendat_reward + answer_pedant_reward
+            # elif question_type == "free-form":
+            #     score = compute_rouge_score(gt_ans, output_ans)
+            #     reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            elif question_type == 'math' or question_type == 'unify' or question_type == "multiple choice" or question_type == "numerical":
+                # print('Extracted description: ', description_extraction)
+                # print('Generated answer: ', output_ans)
+                # print('Sol: ', gt_ans)
+                
+                # description_reward = compute_math_score_single(description_extraction, gt_ans)
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                # print('-' * 10)
+                # reward = description_reward + answer_reward
+                reward = answer_reward
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+#     "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+#     "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+#     "<think> reasoning process here </think><answer> answer here </answer>"
+# )
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>. Please only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "Please think about this question as if you were a human pondering deeply. "
+    #     "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+    #     "It's encouraged to include self-reflection or verification in the reasoning process. "
+    #     "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    # )
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>. Please only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions."
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        # msg ={
+        #     "prompt": 
+        #        [{
+        #             "role": "user",
+        #             "content": [
+        #                 {
+        #                     "type": example['data_type'],
+        #                     # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+        #                 },
+        #                 {
+        #                     "type": "text",
+        #                     "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+        #                 }
+        #                 ]
+        #         }]
+        #     }
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question)
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModifiedOrig
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-noInfo.py b/src/r1-v/src/open_r1/grpo-cot-noInfo.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5a29122f421f8fcebeae2819c7fdca944fe5e74
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-noInfo.py
@@ -0,0 +1,346 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+from typing import Optional
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModified
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+from utils.math_cot_noInfo import *
+
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    
+    def extract_answer(text):
+        pattern = r'<answer>\s*(.*?)\s*</answer>'
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return ""
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+
+    question_type = kwargs['problem_type'][0]
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    for content, sol in zip(contents, solution):
+        
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            if question_type == "multiple choice":
+                reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            elif question_type == "numerical":
+                gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+                out_has_decimal = ("." in output_ans) or ("," in output_ans)
+                if gt_has_decimal != out_has_decimal:
+                    reward = 0.0
+                else:
+                    gt_number = normalize_number(gt_ans)
+                    out_number = normalize_number(output_ans)
+                    if gt_number is None or out_number is None:
+                        reward = 0.0
+                    else:
+                        reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            elif question_type == "OCR":
+                error_rate = wer(gt_ans, output_ans)
+                reward = 1 - error_rate
+                reward = max(0.0, min(1.0, reward))
+            elif question_type == "free-form":
+                score = compute_rouge_score(gt_ans, output_ans)
+                reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            elif question_type == 'math':
+                reward = compute_math_score_single(content, gt_ans)
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [1.0 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    # "format": format_reward,
+}
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+    "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+    "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+    "<think> reasoning process here </think><answer> answer here </answer>"
+)
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user provides a question about an image, "
+#     "and the Assistant is tasked with generating an exhaustive and detailed description of the image. "
+#     "The assistant should extract and describe all possible information from the image—including objects, numbers, text, and their relationships—"
+#     "and enclose this description within <info> </info> tags. "
+#     "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+#     "and provide this step-by-step reasoning within <think> </think> tags. "
+#     "Finally, the assistant provides a single word or phrase answer within <answer> </answer> tags. "
+#     "The output format should be: <info> image description here </info> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>."
+# )
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "Please think about this question as if you were a human pondering deeply. "
+        "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+        "It's encouraged to include self-reflection or verification in the reasoning process. "
+        "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    )
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "You are tasked with analyzing an image to generate an exhaustive and detailed description. "
+    #     "Your goal is to extract and describe all possible information from the image, including but not limited to objects, numbers, text, and the relationships between these elements. "
+    #     "The description should be as fine and detailed as possible, capturing every nuance, and should be enclosed within <info> </info> tags.\n"
+    #     "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+    #     "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+    #     "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+    #     "The output format should be: <info> image description here </info> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>"
+    # )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModified
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-qwenEval.py b/src/r1-v/src/open_r1/grpo-cot-qwenEval.py
new file mode 100644
index 0000000000000000000000000000000000000000..f85da91eabec846422161cec55e96118bafe3096
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-qwenEval.py
@@ -0,0 +1,523 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import ray
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModifiedOrig
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+import torch
+# from utils.gpt_eval import infer
+# from utils.math_cot import *
+from qa_metrics.pedant import PEDANT
+from concurrent.futures import ProcessPoolExecutor
+import os, subprocess, sys
+# from qa_metrics.answerBERT import AnswerBertActor
+# from utils.self_eval import *
+from vllm import LLM, SamplingParams
+
+pedant = None
+# answerBERT = AnswerBertActor(device='cuda:7')
+
+# curr_actor = VllmActor.options(num_gpus=1).remote("Qwen/Qwen2.5-3B-Instruct")
+
+from typing import List
+import os
+import ray, os, subprocess, torch.distributed as dist
+
+MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
+MAX_LEN  = 32_768
+RAY_NS   = "grpo_qwen_vllm"
+RAY_TMP  = "/tmp/ray"
+
+# ------------------------------------------------------------
+# 1. Define the Ray actor class *before* we ever create it
+#    (Ray just needs to see the decorator; it doesn’t need an
+#    active cluster at definition time)
+# ------------------------------------------------------------
+@ray.remote(num_gpus=1,resources={"gpu_7": 1})
+class VllmActor:
+    def __init__(self, model_id):
+        self.engine = LLM(
+            model_id,
+            tensor_parallel_size=1,
+            gpu_memory_utilization=0.80,
+            max_model_len=MAX_LEN,
+            trust_remote_code=True,
+            dtype="bfloat16",
+        )
+        self.default = SamplingParams(top_p=0.9, temperature=0.7, max_tokens=128)
+
+    def generate_batch(self, prompts, sampling=None):
+        outs = self.engine.generate(prompts, sampling_params=sampling or self.default)
+        return [o.outputs[0].text for o in outs]
+
+# ------------------------------------------------------------
+# 2. Torch-DDP initialisation
+# ------------------------------------------------------------
+dist.init_process_group("nccl")
+rank = dist.get_rank()
+
+# ------------------------------------------------------------
+# 3. Rank-0 starts the Ray head, others wait
+# ------------------------------------------------------------
+if rank == 1:
+    ray.init(
+        _temp_dir=RAY_TMP,
+        object_store_memory=1 * 1024**3,
+        namespace=RAY_NS,
+        include_dashboard=False,
+        resources={"gpu_7": 1}
+    )
+    # optional: confirm the head is up
+    # from ray._private.internal_api import wait_for_gcs
+    # wait_for_gcs()
+dist.barrier()       # ---- head definitely running here ----
+
+# ------------------------------------------------------------
+# 4. Non-zero ranks attach to the head
+# ------------------------------------------------------------
+if rank != 0:
+    ray.init(address="auto", _temp_dir=RAY_TMP, namespace=RAY_NS)
+
+dist.barrier()       # ---- every rank now in the cluster ----
+
+# ------------------------------------------------------------
+# 5. Create / look-up the VllmActor
+# ------------------------------------------------------------
+if rank == 1:
+    vllm_actor = (
+        VllmActor.options(name="vllm", namespace=RAY_NS, lifetime="detached")
+        .remote(MODEL_ID)
+    )
+    # block until the model finishes loading so other ranks don’t race
+    ray.get(vllm_actor.generate_batch.remote(["ping"]))
+dist.barrier()       # ---- actor fully alive everywhere ----
+
+if rank != 0:
+    vllm_actor = ray.get_actor("vllm", namespace=RAY_NS)
+
+
+eval_prompt_template = '''You are provided a text description of a problem and a question. Determine the answer to the question based on the text description. Provide your answer as a single final answer or a short phrase enclosed with <answer></answer>. If the question is a multiple choice, the final answer should be a single letter choice. \nText description: {}\nQuestion: {}'''
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def extract_description(predict: str) -> Optional[str]:
+        """
+        Extracts the content of the <answer>…</answer> block from `predict`.
+        Returns the inner text (with leading/trailing whitespace stripped),
+        or None if no <answer> tag is found.
+        """
+        match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+        if not match:
+            return predict
+        return match.group(1).strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    # second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    questions = kwargs['problem']
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    extracted_content_descriptions = [extract_description(ele) for ele in contents]
+    description_eval_inputs = [eval_prompt_template.format(extracted_content_descriptions[count_index], questions[count_index]) for count_index in range(len(extracted_content_descriptions))]
+    # extracted_content_answers = [extract_answer(ele) for ele in contents]
+    # model     = kwargs.get("model")      # may be None if called elsewhere
+    # tokenizer = kwargs.get("tokenizer")
+    # # (optional) example use: let the model score the generated answer
+    # if model is not None and tokenizer is not None:
+    #     model.eval()
+    # description_inputs = [questions[index_count] + ' [SEP] ' + extracted_content_descriptions[index_count] for index_count in range(len(extracted_content_descriptions))]
+    # description_rewards = answerBERT.batch_predict(description_inputs, batch_size = 64)
+    # description_rewards = [infer(extracted_content_descriptions[index_count], questions[index_count]) for index_count in range(len(extracted_content_descriptions))]
+    # description_outputs = generate_batch(description_eval_inputs)
+    print(len(description_eval_inputs))
+    print('Computing rewards...')
+    print('-'*10)
+    # description_outputs = ray.get(vllm_actor.generate.remote(description_eval_inputs))
+    description_outputs = ray.get(
+        vllm_actor.generate_batch_sequential.remote(description_eval_inputs,
+                                                batch_size=32)  # tune to taste
+    )
+    print('Finish computing generating batch')
+    output_answers = [extract_answer(content) for content in contents]
+    gt_answers = [extract_answer(sol) for sol in solution]
+    description_rewards = [compute_math_score_single(description_outputs[curr_idx], gt_answers[curr_idx]) for curr_idx in range(len(description_outputs))]
+    
+    
+    
+    
+    # for content, sol, description_reward in zip(contents, solution, description_rewards):
+    # for content, sol, question in zip(contents, solution, questions):
+    # for content, sol, second_content in zip(contents, solution, second_completions):
+    for output_ans, gt_ans, description_reward in zip(output_answers, gt_answers, description_rewards):
+        try:
+            # output_ans = extract_answer(content)
+            # gt_ans = extract_answer(sol)
+            # description_extraction = extract_answer(second_content)
+            # if question_type == "multiple choice":
+            #     reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            # elif question_type == "numerical":
+            #     gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+            #     out_has_decimal = ("." in output_ans) or ("," in output_ans)
+            #     if gt_has_decimal != out_has_decimal:
+            #         reward = 0.0
+            #     else:
+            #         gt_number = normalize_number(gt_ans)
+            #         out_number = normalize_number(output_ans)
+            #         if gt_number is None or out_number is None:
+            #             reward = 0.0
+            #         else:
+            #             reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            if question_type == "OCR":
+                # description_extraction = extract_answer(second_content)
+                # description_error_rate = wer(gt_ans, description_extraction)
+                # description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+                # error_rate = wer(gt_ans, output_ans)
+                answer_pedant_reward = pedant.get_score(gt_ans, output_ans, questions[0])
+                # reward = (1 - error_rate) + (1- description_error_rate)
+                # reward = max(0.0, min(2.0, reward))
+                # print('Extracted description: ', description_extraction)
+                print('Generated answer: ', output_ans)
+                print('Sol: ', gt_ans)
+                # print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                print('-' * 10)
+                # reward = description_pendat_reward + answer_pedant_reward
+                reward = answer_pedant_reward
+            # elif question_type == "free-form":
+            #     score = compute_rouge_score(gt_ans, output_ans)
+            #     reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            elif question_type == 'math' or question_type == 'unify' or question_type == "multiple choice" or question_type == "numerical":
+                # print('Extracted description: ', description_extraction)
+                print('Generated answer: ', output_ans)
+                print('Sol: ', gt_ans)
+                
+                # description_reward = compute_math_score_single(description_extraction, gt_ans)
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+                print('-' * 10)
+                reward = description_reward + answer_reward
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {output_ans}\n")
+                f.write(f"Solution: {gt_ans}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>"
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModifiedOrig
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-selfEval.py b/src/r1-v/src/open_r1/grpo-cot-selfEval.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad2178851614650facc1678324244060fb415630
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-selfEval.py
@@ -0,0 +1,457 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModified
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+
+# pedant = PEDANT()
+ 
+'''
+Alpha constant: When the description is wrong, but the final answer is right, the model is doing reward hacking, 
+so we give it a partial reward
+'''
+alpha = 1.0
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    question = kwargs['problem'][0]
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    
+    # model     = kwargs.get("model")      # may be None if called elsewhere
+    # tokenizer = kwargs.get("tokenizer")
+
+    # # (optional) example use: let the model score the generated answer
+    # if model is not None and tokenizer is not None:
+    #     model.eval()
+    
+    # for content, sol in zip(contents, solution):
+    for content, sol, second_content in zip(contents, solution, second_completions):
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            description_extraction = extract_answer(second_content)
+            # if question_type == "multiple choice":
+            #     reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            # elif question_type == "numerical":
+            #     gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+            #     out_has_decimal = ("." in output_ans) or ("," in output_ans)
+            #     if gt_has_decimal != out_has_decimal:
+            #         reward = 0.0
+            #     else:
+            #         gt_number = normalize_number(gt_ans)
+            #         out_number = normalize_number(output_ans)
+            #         if gt_number is None or out_number is None:
+            #             reward = 0.0
+            #         else:
+            #             reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            # if question_type == "OCR":
+            #     # description_extraction = extract_answer(second_content)
+            #     # description_error_rate = wer(gt_ans, description_extraction)
+            #     description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+            #     # error_rate = wer(gt_ans, output_ans)
+            #     answer_pedant_reward = pedant.get_score(gt_ans, output_ans, question)
+            #     # reward = (1 - error_rate) + (1- description_error_rate)
+            #     # reward = max(0.0, min(2.0, reward))
+            #     print('Extracted description: ', description_extraction)
+            #     print('Generated answer: ', output_ans)
+            #     print('Sol: ', gt_ans)
+            #     print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+            #     print('-' * 10)
+            #     reward = description_pendat_reward + answer_pedant_reward
+            if question_type == "free-form":
+                score = compute_rouge_score(gt_ans, output_ans)
+                description_score = compute_rouge_score(gt_ans, description_extraction)
+                reward = max(0.0, min(1.0, score)) + max(0.0, min(1.0, description_score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                description_number = normalize_number(description_extraction)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                    
+                if description_number is None:
+                    description_reward = 0.0
+                    
+                
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                
+                description_diff = (abs(description_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                description_diff = min(1.0, max(0.0, description_diff))
+                
+                reward = (1 - rel_diff) + (1 - description_diff)    
+            elif question_type == 'math' or question_type == 'unify' or question_type == 'multiple choice' or question_type == 'numerical':
+                description_reward = compute_math_score_single(description_extraction, gt_ans)
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                
+                if description_reward == 0 and answer_reward == 1:
+                    # Avoid multiplication to save computation
+                    reward = alpha
+                else:
+                    reward = description_reward + answer_reward
+                
+                # print(f"Extracted description: {description_extraction} | Generated answer: {output_ans} | Sol: {gt_ans}")
+                # print(f'Description reward: {description_reward} | answer reward: {answer_reward} | final reward: {reward}')
+                # print('-' * 10)
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>. Please only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions."
+)
+
+
+def main(script_args, training_args, model_args):
+    print('Start program..')
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    
+    print('Loading dataset')
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image and image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>. Please keep your final answer short and precise."
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+    
+    ABS_Verify_Prompt = '''You are provided a text description of a problem and a question. Determine the answer to the question based on the text description. First provide a step-by-step reasoning within <think> </think> tags, then provide your answer as a single final answer, single letter choice, or a short phrase ENCLOSED with <answer> </answer> tags. \nText description: {{Description}}\nQuestion: {Question}\nPlease only return the final single letter choice within the <answer> </answer> tags for multiple choice questions; Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags for numerical questions.'''
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            # "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                            "text": QUESTION_TEMPLATE.format(Question=question) 
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+    
+    def make_verify_conversation(example):
+        # ➊ build the question text
+        question = example["problem"]
+        if example["problem_type"] == "multiple choice":
+            question += "Options:\n" + "\n".join(example["options"])
+
+        # ➋ verification template + suffix (no if/else)
+        verify_text = (
+            ABS_Verify_Prompt.format(Question=question.replace("<image>", ""))
+            # + TYPE_TEMPLATE[example["problem_type"]]        # ← one-liner, no branching
+        )
+
+        # ➌ conversation dict
+        conv_dict = {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [{"type": "text", "text": verify_text}],
+                }
+            ]
+        }
+
+        # templated = maybe_apply_chat_template(conv_dict, processing_class)["prompt"]
+        # return {"verify_prompt": templated}
+        return {"verify_prompt": conv_dict}
+
+    
+    
+    
+    print('Start mapping dataset')
+    dataset = dataset.map(make_conversation_image_and_video)
+    dataset = dataset.map(
+        make_verify_conversation,
+        desc="add description verify prompt",
+    )
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModified
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot-selfEvalConst.py b/src/r1-v/src/open_r1/grpo-cot-selfEvalConst.py
new file mode 100644
index 0000000000000000000000000000000000000000..69cb7c8b76337aa2f16f6037ff93186d6761df71
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot-selfEvalConst.py
@@ -0,0 +1,456 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerSelfConst
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+
+# pedant = PEDANT()
+ 
+'''
+Alpha constant: When the description is wrong, but the final answer is right, the model is doing reward hacking, 
+so we give it a partial reward
+'''
+alpha = 0.85
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        # format_score = format_reward(predict)
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    question = kwargs['problem'][0]
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    
+    # model     = kwargs.get("model")      # may be None if called elsewhere
+    # tokenizer = kwargs.get("tokenizer")
+
+    # # (optional) example use: let the model score the generated answer
+    # if model is not None and tokenizer is not None:
+    #     model.eval()
+    
+    # for content, sol in zip(contents, solution):
+    for content, sol, second_content in zip(contents, solution, second_completions):
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            description_extraction = extract_answer(second_content)
+            # if question_type == "multiple choice":
+            #     reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            # elif question_type == "numerical":
+            #     gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+            #     out_has_decimal = ("." in output_ans) or ("," in output_ans)
+            #     if gt_has_decimal != out_has_decimal:
+            #         reward = 0.0
+            #     else:
+            #         gt_number = normalize_number(gt_ans)
+            #         out_number = normalize_number(output_ans)
+            #         if gt_number is None or out_number is None:
+            #             reward = 0.0
+            #         else:
+            #             reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            # if question_type == "OCR":
+            #     # description_extraction = extract_answer(second_content)
+            #     # description_error_rate = wer(gt_ans, description_extraction)
+            #     description_pendat_reward = pedant.get_score(gt_ans, description_extraction, question)
+            #     # error_rate = wer(gt_ans, output_ans)
+            #     answer_pedant_reward = pedant.get_score(gt_ans, output_ans, question)
+            #     # reward = (1 - error_rate) + (1- description_error_rate)
+            #     # reward = max(0.0, min(2.0, reward))
+            #     print('Extracted description: ', description_extraction)
+            #     print('Generated answer: ', output_ans)
+            #     print('Sol: ', gt_ans)
+            #     print(f'Description reward: {description_reward}; answer reward: {answer_reward}')
+            #     print('-' * 10)
+            #     reward = description_pendat_reward + answer_pedant_reward
+            if question_type == "free-form":
+                score = compute_rouge_score(gt_ans, output_ans)
+                description_score = compute_rouge_score(gt_ans, description_extraction)
+                reward = max(0.0, min(1.0, score)) + max(0.0, min(1.0, description_score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                description_number = normalize_number(description_extraction)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                    
+                if description_number is None:
+                    description_reward = 0.0
+                    
+                
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                
+                description_diff = (abs(description_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                description_diff = min(1.0, max(0.0, description_diff))
+                
+                reward = (1 - rel_diff) + (1 - description_diff)    
+            elif question_type == 'math' or question_type == 'unify' or question_type == 'multiple choice' or question_type == 'numerical':
+                description_reward = compute_math_score_single(description_extraction, gt_ans)
+                answer_reward = compute_math_score_single(output_ans, gt_ans)
+                
+                if description_reward == 0 and answer_reward == 1:
+                    # Avoid multiplication to save computation
+                    reward = alpha
+                else:
+                    reward = description_reward + answer_reward
+                
+                # print(f"Extracted description: {description_extraction} | Generated answer: {output_ans} | Sol: {gt_ans}")
+                # print(f'Description reward: {description_reward} | answer reward: {answer_reward} | final reward: {reward}')
+                # print('-' * 10)
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    # pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    pattern = r"<des>.*?</des>\s*<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. After the user asks a question about an image, write a rich, self-contained description of that image—detailed enough that someone could answer the question from the description alone, without ever seeing the image. Enclose the entire description in <des> </des> tags."
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word, single letter choice, or phrase answer within <answer> </answer> tags."
+    "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>."
+)
+
+
+def main(script_args, training_args, model_args):
+    print('Start program..')
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    
+    print('Loading dataset')
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description to answer a question. "
+        "Analyze the image and produce a thorough, self-contained description—detailed enough for someone to answer the question using the description alone. Wrap the entire description in <des> </des> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <des> image description here </des> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>"
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) within the <answer> </answer> tags.",
+    }
+    
+    ABS_Verify_Prompt = '''You are provided a text description of a problem and a question. Determine the answer to the question based on the text description. First provide a step-by-step reasoning within <think> </think> tags, then provide your answer as a single final answer, single letter choice, or a short phrase ENCLOSED with <answer> </answer> tags. \nText description: {{Description}}\nQuestion: {Question}'''
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+    
+    def make_verify_conversation(example):
+        # ➊ build the question text
+        question = example["problem"]
+        if example["problem_type"] == "multiple choice":
+            question += "Options:\n" + "\n".join(example["options"])
+
+        # ➋ verification template + suffix (no if/else)
+        verify_text = (
+            ABS_Verify_Prompt.format(Question=question.replace("<image>", ""))
+            + TYPE_TEMPLATE[example["problem_type"]]        # ← one-liner, no branching
+        )
+
+        # ➌ conversation dict
+        conv_dict = {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [{"type": "text", "text": verify_text}],
+                }
+            ]
+        }
+
+        # templated = maybe_apply_chat_template(conv_dict, processing_class)["prompt"]
+        # return {"verify_prompt": templated}
+        return {"verify_prompt": conv_dict}
+
+    
+    
+    
+    print('Start mapping dataset')
+    dataset = dataset.map(make_conversation_image_and_video)
+    dataset = dataset.map(
+        make_verify_conversation,
+        desc="add description verify prompt",
+    )
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerSelfConst
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-cot.py b/src/r1-v/src/open_r1/grpo-cot.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ad1c16cef270435b41878fa2e00b5f8dc689c95
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-cot.py
@@ -0,0 +1,351 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+from typing import Optional
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModified
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+from utils.math_cot import *
+
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy", "format"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    # )
+    
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    
+    def extract_answer(text):
+        pattern = r'<answer>\s*(.*?)\s*</answer>'
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return ""
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+
+    question_type = kwargs['problem_type'][0]
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    for content, sol in zip(contents, solution):
+        
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            if question_type == "multiple choice":
+                reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            elif question_type == "numerical":
+                gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+                out_has_decimal = ("." in output_ans) or ("," in output_ans)
+                if gt_has_decimal != out_has_decimal:
+                    reward = 0.0
+                else:
+                    gt_number = normalize_number(gt_ans)
+                    out_number = normalize_number(output_ans)
+                    if gt_number is None or out_number is None:
+                        reward = 0.0
+                    else:
+                        reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            elif question_type == "OCR":
+                error_rate = wer(gt_ans, output_ans)
+                reward = 1 - error_rate
+                reward = max(0.0, min(1.0, reward))
+            elif question_type == "free-form":
+                score = compute_rouge_score(gt_ans, output_ans)
+                reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            elif question_type == 'math':
+                reward = compute_math_score_single(content, gt_ans)
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [1.0 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    # "format": 0,
+}
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+#     "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+#     "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+#     "<think> reasoning process here </think><answer> answer here </answer>"
+# )
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. The user provides a question about an image, "
+    "and the Assistant is tasked with generating an exhaustive and detailed description of the image. "
+    "The assistant should extract and describe all possible information from the image—including objects, numbers, text, and their relationships—"
+    "and enclose this description within <info> </info> tags. "
+    "Next, the assistant should think deeply about the reasoning process, engaging in an internal dialogue and self-reflection, "
+    "and provide this step-by-step reasoning within <think> </think> tags. "
+    "Finally, the assistant provides a single word or phrase answer within <answer> </answer> tags. "
+    "The output format should be: <info> image description here </info> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "Please think about this question as if you were a human pondering deeply. "
+    #     "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+    #     "It's encouraged to include self-reflection or verification in the reasoning process. "
+    #     "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    # )
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "You are tasked with analyzing an image to generate an exhaustive and detailed description. "
+        "Your goal is to extract and describe all possible information from the image, including but not limited to objects, numbers, text, and the relationships between these elements. "
+        "The description should be as fine and detailed as possible, capturing every nuance, and should be enclosed within <info> </info> tags.\n"
+        "Next, engage in an internal dialogue as if you were a human pondering deeply—use expressions such as 'let me think', 'wait', 'hmm', 'oh, I see', 'let's break it down', etc., and include self-reflection or verification in your reasoning process. "
+        "Provide your detailed, step-by-step reasoning based on the image description, and enclose this part within <think> </think> tags.\n"
+        "Finally, provide a single word or phrase answer to the question, enclosed within <answer> </answer> tags.\n"
+        "The output format should be: <info> image description here </info> <think> reasoning process here </think> <answer> FINAL ANSWER here </answer>"
+    )
+
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "math": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModified
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo-description-LLMEval.py b/src/r1-v/src/open_r1/grpo-description-LLMEval.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d8f4171f054899a32fe30313ad7863cccc252bb
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo-description-LLMEval.py
@@ -0,0 +1,579 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+from openai import OpenAI
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModifiedOrig
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from typing import Dict, List, Optional
+from mathruler.grader import extract_boxed_content, grade_answer
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+# from utils.gpt_eval import infer
+# from utils.math_cot import *
+# from qa_metrics.pedant import PEDANT
+# from qa_metrics.answerBERT import AnswerBertActor
+
+# pedant = PEDANT()
+# answerBERT = AnswerBertActor(device='cuda:7')
+
+alpha = 1.0
+
+TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) in \\boxed{}.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) in \\boxed{}.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer in \\boxed{}.",
+        "free-form": " Please provide your text answer in \\boxed{}.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) in \\boxed{}.",
+        "math": " Please provide the final exact answer (single option letter for multiple choice) in \\boxed{}.",
+    }
+
+'''
+gpt infer
+'''
+import os
+from openai import AzureOpenAI
+import time
+
+import base64
+from mimetypes import guess_type
+
+
+def azure_gpt4(messages, model):
+    outputs = []
+    for message in messages:
+        input_prompt = [
+                { "role": "system", "content": "You are a helpful assistant." },
+                { "role": "user", "content": [  
+                    { 
+                        "type": "text", 
+                        "text": message["instruction"] 
+                    },
+                    # { 
+                    #     "type": "image_url",
+                    #     "image_url": {
+                    #         "url": message["image"]
+                    #         }
+                    # }
+                ]} 
+            ]
+        ## try N times if API exceed limit ... 
+        for i in range(10):
+            try:
+                output = client.chat.completions.create(
+                    model=model, messages=input_prompt, max_tokens=2000 
+                )
+
+                output_text = output.choices[0].message.content
+                break ## exit if successful
+            
+            except Exception as e:
+                print(f'Index {i} got error message: {e}')
+                output_text = ''
+                time.sleep(3)
+
+        outputs.append(output_text)    
+
+    return outputs
+
+
+client = AzureOpenAI(
+        api_key = "83f30a2a22324395b854bd343db38d85",  
+        api_version = "2024-08-01-preview",
+        azure_endpoint = "https://francecentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
+        )
+
+model = "gpt-4o"
+prompt_template = '''Text description: {text}\nQuestion: {question}\nYou are provided a text description of a problem and a question. Determine the answer to the question based on the text description. First provide an internal step-by-step reasoning within <think> </think> tags, then provide a single word or phrase answer in \\boxed{}.'''
+
+
+# client = OpenAI(
+#     base_url="http://29.81.244.54:8080/v1",  # your vLLM server
+#     api_key="ANYKEY",                        # if you set --api-key when launching
+# )
+
+client = OpenAI(
+    base_url="http://29.81.224.188:8080/v1",  # your vLLM server
+    api_key="ANYKEY",                        # if you set --api-key when launching
+)
+
+def chat_batch(
+    client,
+    all_message_batches: List[List[Dict[str, str]]],
+    *,
+    # model: str = "Qwen2.5-32B-Instruct",
+    model: str = "Qwen2.5-32B-finetune",
+    max_workers: int = 8,
+    retries: int = 2,
+    backoff: float = 0.5,
+    timeout: Optional[float] = None,
+) -> List[str]:
+    """
+    Send many chat requests in parallel and return replies as a list of strings,
+    preserving the order of `all_message_batches`.
+    """
+
+    def _chat_once_with_retry(messages: List[Dict[str, str]]) -> str:
+        last_err: Optional[BaseException] = None
+        for attempt in range(retries + 1):
+            try:
+                resp = client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    timeout=timeout,
+                )
+                # Different SDKs expose content slightly differently; handle common cases.
+                choice = resp.choices[0]
+                if hasattr(choice, "message") and getattr(choice.message, "content", None) is not None:
+                    return choice.message.content
+                if hasattr(choice, "text") and choice.text is not None:
+                    return choice.text
+                # Fallback to stringifying the choice if structure is unexpected.
+                return str(choice)
+            except Exception as e:
+                last_err = e
+                if attempt < retries:
+                    sleep(backoff * (2 ** attempt))
+        return f"Error: {last_err!r}"
+
+    results: List[Optional[str]] = [None] * len(all_message_batches)
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_idx = {
+            executor.submit(_chat_once_with_retry, batch): i
+            for i, batch in enumerate(all_message_batches)
+        }
+        for fut in as_completed(future_to_idx):
+            i = future_to_idx[fut]
+            results[i] = fut.result()
+
+    # mypy-friendly cast: no Nones remain at this point
+    return [r if r is not None else "Error: Unknown failure" for r in results]
+
+
+def infer(prompt):
+    # prompt_question = prompt_question.replace('<image>', '')
+    # prompt = prompt_template.replace('{text}', text).replace('{question}', prompt_question)
+    
+    messages = [
+            {"instruction": prompt}, 
+            ]
+    prompt_success = False
+    prompt_time = 0
+    outputs = ['\\boxed{None}']
+    while prompt_success == False and prompt_time <= 2:
+        try:
+            outputs = azure_gpt4(messages, model)
+            prompt_success = True
+        except:
+            prompt_time += 1
+            time.sleep(5)
+    
+    return outputs[0]
+
+'''
+end of gpt infer
+'''
+
+
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+def _call_infer(desc):
+    return infer(desc)
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    
+    # reward_funcs: list[str] = field(
+    #     default_factory=lambda: ["accuracy"],
+    #     metadata={"help": "List of reward functions. Possible values: 'accuracy'"},
+    # )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    def extract_answer(text: str) -> str:
+        """
+        1) Try the full <answer> … </answer> block.
+        2) If that is missing, grab whatever follows the opening <answer> tag.
+        3) Otherwise return the original text.
+        """
+        # ① normal case  <answer> … </answer>
+        m = re.search(r'<answer>\s*(.*?)\s*</answer>', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ② fallback  <answer> … <end-of-string>
+        m = re.search(r'<answer>\s*(.*)$', text, flags=re.DOTALL | re.IGNORECASE)
+        if m:
+            return m.group(1).strip()
+
+        # ③ nothing found
+        return text.strip()
+    
+    def extract_description(predict: str) -> Optional[str]:
+        """
+        Extracts the content of the <answer>…</answer> block from `predict`.
+        Returns the inner text (with leading/trailing whitespace stripped),
+        or None if no <answer> tag is found.
+        """
+        match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+        if not match:
+            return predict
+        return match.group(1).strip()
+    
+    def single_accuracy_reward(predict: str, ground_truth: str) -> float:
+        answer = predict
+        return 1.0 if grade_answer(answer, ground_truth) else 0.0
+    
+    def compute_math_score_single(predict: str, ground_truth: str, format_weight: float = 0.0) -> Dict[str, float]:
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  
+        accuracy_score = single_accuracy_reward(predict, ground_truth)
+        # return (1 - format_weight) * accuracy_score + format_weight * format_score
+        return accuracy_score
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+    # print('Computing rewards now...')
+    # second_prompts     = kwargs.get("second_prompts")      # ← list[str] or None
+    # second_completions = kwargs.get("second_completions")
+    # second_contents = [comp[0]["content"] for comp in second_completions]
+    # print('second prompts', second_prompts)
+    # print('-'*10)
+    # print('second completions', second_completions)
+    # print('-'*10)
+    
+    # import time
+    # time.sleep(30)
+    question_type = kwargs['problem_type'][0]
+    questions = kwargs['problem']
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    extracted_content_descriptions = [extract_description(ele) for ele in contents]
+    
+    description_query_inputs = []
+    batch_messages = []
+    vllm_batch_messages = []
+    
+    for index in range(len(extracted_content_descriptions)):
+        prompt_question = questions[index]
+        des_text = extracted_content_descriptions[index]
+        prompt_question = prompt_question.replace('<image>', '')
+        prompt_input = prompt_template.replace('{text}', des_text).replace('{question}', prompt_question) + TYPE_TEMPLATE[question_type]
+        description_query_inputs.append(prompt_input)
+        curr_msg = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user",   "content": prompt_input}
+        ]
+        vllm_batch_messages.append(curr_msg)
+    
+    
+    batched_vllm_outputs = chat_batch(client, vllm_batch_messages)
+    
+    description_score_outputs = [extract_boxed_content(idx_input) for idx_input in batched_vllm_outputs]
+    # with ThreadPoolExecutor(max_workers=8) as executor:
+    #     futures = [
+    #         executor.submit(_call_infer, desc)
+    #         for desc in description_query_inputs
+    #     ]
+    #     # collect as they finish (optional—keeps order of completion)
+    #     for fut in as_completed(futures):
+    #         # description_score_outputs.append(extract_answer(fut.result()))
+    #         # extract_boxed_content
+    #         description_score_outputs.append(extract_boxed_content(fut.result()))
+    
+    
+    gt_answers = [extract_answer(sol) for sol in solution]
+    description_rewards = [compute_math_score_single(description_score_outputs[count_idx], gt_answers[count_idx]) for count_idx in range(len(description_score_outputs))]
+    
+    print(gt_answers)
+    print(description_score_outputs)
+    print(description_rewards)
+    print('-'*10)
+    
+    
+    for content, gt_ans, description_reward in zip(contents, gt_answers, description_rewards):
+    # for content, sol, question in zip(contents, solution, questions):
+    # for content, sol, second_content in zip(contents, solution, second_completions):
+        try:
+            # output_ans = extract_answer(content)
+            output_ans = extract_boxed_content(content)
+           
+            if question_type != 'None':
+                answer_reward = compute_math_score_single(output_ans, gt_ans)                
+                if description_reward == 0 and answer_reward == 1:
+                    reward = alpha
+                else:
+                    reward = description_reward + answer_reward
+                # reward = answer_reward
+            else:
+                print('Falling back to none rewards')
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {gt_ans}\n")
+            
+    return rewards
+
+
+def simple_format_reward(completions, **kwargs):
+    """Reward function that checks the same format as `format_reward`:
+       <description>...</description><think>...</think>\boxed{...}
+    """
+    pattern = re.compile(
+        r"^\s*<description>.*?</description>\s*"
+        r"<think>.*?</think>\s*"
+        r"\\boxed\{.*?\}\s*$",
+        re.DOTALL,
+    )
+    completion_contents = [completion[0]["content"] for completion in completions]
+    return [0.1 if pattern.fullmatch(content or "") else 0.0
+            for content in completion_contents]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": simple_format_reward,
+}
+
+# SYSTEM_PROMPT = (
+#     "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+#     "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+#     "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+#     "<think> reasoning process here </think><answer> answer here </answer>"
+# )
+
+SYSTEM_PROMPT = (
+    "You are tasked with analyzing an image/video to generate a detailed description to help you answer the question. First analyze the image/video and produce a self-contained description—detailed enough that can lead to the correct answer. Wrap the entire description in <description> </description> tags.\n Next, engage in an internal dialogue and include self-reflection or verification in your reasoning process. Provide your detailed, step-by-step reasoning based on the image/video description information and image/video, and enclose this part within <think> </think> tags.\n Finally, provide a single word or phrase answer to the question in \boxed{}.\nThe output format should be: <description> image/video description here </description> <think> reasoning process here </think> \boxed{FINAL ANSWER here}."
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    # QUESTION_TEMPLATE = (
+    #     "{Question}\n"
+    #     "Please think about this question as if you were a human pondering deeply. "
+    #     "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+    #     "It's encouraged to include self-reflection or verification in the reasoning process. "
+    #     "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    # )
+    
+    QUESTION_TEMPLATE = (
+    "{Question}\n"
+    "You are tasked with analyzing an image/video to generate a detailed description to help you answer the question. "
+    "First analyze the image/video and produce a self-contained description—detailed enough that can lead to the correct answer. "
+    "Wrap the entire description in <description> </description> tags.\n"
+    "Next, engage in an internal dialogue and include self-reflection or verification in your reasoning process. "
+    "Provide your detailed, step-by-step reasoning based on the image/video description information and image/video, and enclose this part within <think> </think> tags.\n"
+    "Finally, provide a single word or phrase answer to the question in \\boxed{{}}.\n"
+    "The output format should be: <description> image/video description here </description> "
+    "<think> reasoning process here </think> \\boxed{{FINAL ANSWER here}}."
+)
+
+
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            # "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                            "text": QUESTION_TEMPLATE.format(Question=question)
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModifiedOrig
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo.py b/src/r1-v/src/open_r1/grpo.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb12ea6d2677b3e4d0b88abbffe867b85077c9a7
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo.py
@@ -0,0 +1,318 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+from typing import Optional
+
+from datasets import load_dataset, load_from_disk
+from transformers import Qwen2VLForConditionalGeneration
+
+from trainer import Qwen2VLGRPOTrainer, Qwen2VLGRPOVLLMTrainerModified
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+
+from datasets import Dataset, DatasetDict
+
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from rouge_score import rouge_scorer
+
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={"help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    temporal: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using temporal GRPO"},
+    )
+    len_control: Optional[bool] = field(
+        default=True,
+        metadata={"help": "whether using length reward"},
+    )
+
+
+
+def accuracy_reward(completions, solution, **kwargs):
+    
+    def extract_answer(text):
+        pattern = r'<answer>\s*(.*?)\s*</answer>'
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return ""
+
+    def normalize_number(num_str):
+        try:
+            num_str = num_str.replace(',', '')
+            return float(num_str)
+        except Exception as e:
+            print(f"Error converting '{num_str}' to float: {e}")
+            return None
+
+    def wer(reference, hypothesis):
+        ref_words = reference.split()
+        hyp_words = hypothesis.split()
+        m = len(ref_words)
+        n = len(hyp_words)
+        d = [[0]*(n+1) for _ in range(m+1)]
+        for i in range(m+1):
+            d[i][0] = i
+        for j in range(n+1):
+            d[0][j] = j
+        for i in range(1, m+1):
+            for j in range(1, n+1):
+                if ref_words[i-1] == hyp_words[j-1]:
+                    d[i][j] = d[i-1][j-1]
+                else:
+                    d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+        return d[m][n] / max(1, m)
+
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+    
+
+    question_type = kwargs['problem_type'][0]
+    
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    for content, sol in zip(contents, solution):
+    
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            if question_type == "multiple choice":
+                reward = 1.0 if output_ans.strip() == gt_ans.strip() else 0.0
+            elif question_type == "numerical":
+                gt_has_decimal = ("." in gt_ans) or ("," in gt_ans)
+                out_has_decimal = ("." in output_ans) or ("," in output_ans)
+                if gt_has_decimal != out_has_decimal:
+                    reward = 0.0
+                else:
+                    gt_number = normalize_number(gt_ans)
+                    out_number = normalize_number(output_ans)
+                    if gt_number is None or out_number is None:
+                        reward = 0.0
+                    else:
+                        reward = 1.0 if round(gt_number, 2) == round(out_number, 2) else 0.0
+            elif question_type == "OCR":
+                error_rate = wer(gt_ans, output_ans)
+                reward = 1 - error_rate
+                reward = max(0.0, min(1.0, reward))
+            elif question_type == "free-form":
+                score = compute_rouge_score(gt_ans, output_ans)
+                reward = max(0.0, min(1.0, score))
+            elif question_type == "regression":
+                gt_number = normalize_number(gt_ans)
+                out_number = normalize_number(output_ans)
+                if gt_number is None or out_number is None:
+                    reward = 0.0
+                rel_diff = (abs(out_number - gt_number) + 1e-9) / (abs(gt_number) + 1e-9)
+                rel_diff = min(1.0, max(0.0, rel_diff))
+                reward = 1 - rel_diff
+            else:
+                reward = 0.0
+        except Exception as e:
+            print(f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+    
+        rewards.append(reward)
+        
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = os.getenv("LOG_PATH")
+            # local_rank = int(os.getenv("LOCAL_RANK", 0))
+            with open(log_path, "a", encoding="utf-8") as f:
+                f.write(f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                f.write(f"Content: {content}\n")
+                f.write(f"Solution: {sol}\n")
+            
+    return rewards
+
+
+def format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"] for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
+    return [0.1 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": format_reward,
+}
+
+SYSTEM_PROMPT = (
+    "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
+    "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
+    "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
+    "<think> reasoning process here </think><answer> answer here </answer>"
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func] for func in script_args.reward_funcs]
+
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+
+    # Format into conversation
+    def make_conversation(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": example["problem"]},
+            ],
+        }
+
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "Please think about this question as if you were a human pondering deeply. "
+        "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+        "It's encouraged to include self-reflection or verification in the reasoning process. "
+        "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    )
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags."
+    }
+
+    def make_conversation_image(example):
+        
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+        }
+    
+        
+    def make_conversation_video(example):
+        return {
+            "prompt": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "video"},
+                        {"type": "text", "text": QUESTION_TEMPLATE.format(Question=example["problem"])},
+                    ],
+                },
+            ],
+    }
+        
+    def make_conversation_image_and_video(example):
+        if example["problem_type"] == 'multiple choice':
+            question = example['problem'] + "Options:\n"
+            for op in example["options"]:
+                question += op + "\n"
+        else:
+            question = example['problem']
+
+        
+        msg ={
+            "prompt": 
+               [{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": example['data_type'],
+                            # example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                        },
+                        {
+                            "type": "text",
+                            "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                        }
+                        ]
+                }]
+            }
+        
+        return msg
+
+    
+    dataset = dataset.map(make_conversation_image_and_video)
+
+    
+    trainer_cls = Qwen2VLGRPOTrainer if not training_args.use_vllm else Qwen2VLGRPOVLLMTrainerModified
+    print("using: ", trainer_cls)
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        script_args=script_args,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+    )
+    
+    if training_args.resume_from_checkpoint is not None:
+        checkpoint = training_args.resume_from_checkpoint
+        trainer.train(resume_from_checkpoint=checkpoint)
+    else:
+        trainer.train()
+
+    # Save and push to hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/grpo_vllm_caption.py b/src/r1-v/src/open_r1/grpo_vllm_caption.py
new file mode 100644
index 0000000000000000000000000000000000000000..563b2927343a399a02db20d39d0b47b624116c40
--- /dev/null
+++ b/src/r1-v/src/open_r1/grpo_vllm_caption.py
@@ -0,0 +1,266 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+from datetime import datetime
+import json
+from io import BytesIO
+import base64
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from datasets import load_dataset
+from rouge_score import rouge_scorer
+from trl import GRPOConfig, GRPOTrainer, ModelConfig, ScriptArguments, TrlParser, get_peft_config
+import Levenshtein
+import wandb
+
+from dataclasses import dataclass, field
+from typing import Optional
+from math_verify import parse, verify
+
+from trainer.grpo_trainer_vllm_caption import Qwen2VLGRPOTrainerCap
+
+os.environ["WANDB_MODE"] = "offline"
+
+
+wandb.init(project="SelfEval-R1", name="SelfEval-R1")
+
+
+@dataclass
+class GRPOScriptArguments(ScriptArguments):
+    """
+    Script arguments for the GRPO training script.
+
+    Args:
+        reward_funcs (`list[str]`):
+            List of reward functions. Possible values: 'accuracy', 'format'.
+    """
+
+    reward_funcs: list[str] = field(
+        default_factory=lambda: ["accuracy", "format"],
+        metadata={
+            "help": "List of reward functions. Possible values: 'accuracy', 'format'"},
+    )
+    max_pixels: Optional[int] = field(
+        default=12845056,
+        metadata={"help": "Maximum number of pixels for the image"},
+    )
+    min_pixels: Optional[int] = field(
+        default=3136,
+        metadata={"help": "Minimum number of pixels for the image"},
+    )
+    caption_reward: Optional[bool] = field(
+        default=True,
+        metadata={"help": "Whether to use caption reward or not"},
+    )
+    caption_reward_weight: Optional[float] = field(
+        default=0.1,
+        metadata={"help": "Weight for the caption reward"},
+    )
+
+
+# This function is partially borrowed from Video-R1[https://github.com/tulerfeng/Video-R1]
+def accuracy_reward(completions, solution, **kwargs):
+
+    def extract_answer(text):
+        pattern = r'<answer>(.*?)</answer>'
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return ""
+
+    def extract_option(text):
+        pattern = r'<option>(.*?)</option>'
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return ""
+
+    def is_number(num_str):
+        try:
+            float(num_str)
+            return True
+        except Exception as e:
+            return False
+
+    def extract_numbers(answer):
+        pattern = r"[-+]?\d*\.?\d+"
+        match = re.search(pattern, answer)
+        if match:
+            number_str = match.group()
+            if answer.strip().endswith('%'):
+                number = float(number_str) / 100
+            else:
+                number = float(number_str)
+            return number
+        else:
+            return None
+
+    def anls(reference, hypothesis):
+        distance = Levenshtein.distance(reference, hypothesis)
+        max_length = max(len(reference), len(hypothesis))
+        similarity = 1 - (distance / max_length)
+
+        return similarity
+
+    def compute_rouge_score(reference, hypothesis, use_stemmer=True):
+        scorer = rouge_scorer.RougeScorer(
+            ['rouge1', 'rouge2', 'rougeL'], use_stemmer=use_stemmer)
+        scores = scorer.score(reference, hypothesis)
+        average_fmeasure = (
+            scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3
+        return average_fmeasure
+
+    question_type = kwargs['problem_type'][0]
+
+    contents = [completion[0]["content"] for completion in completions]
+    current_time = datetime.now().strftime("%d-%H-%M-%S-%f")
+    rewards = []
+
+    for content, sol in zip(contents, solution):
+        try:
+            output_ans = extract_answer(content)
+            gt_ans = extract_answer(sol)
+            if question_type == "OCR":
+                if is_number(gt_ans):
+                    output_ans = extract_numbers(output_ans)
+                    reward = 1.0 if output_ans == float(
+                        gt_ans) else 0.0
+                else:
+                    reward = anls(gt_ans.lower(),
+                                  output_ans.lower())
+                    reward = max(0.0, min(1.0, reward))
+            elif question_type == "free-form":
+                score = compute_rouge_score(gt_ans, output_ans)
+                reward = max(0.0, min(1.0, score))
+            else:
+                if is_number(gt_ans):
+                    output_ans = extract_numbers(output_ans)
+                    reward = 1.0 if output_ans == float(
+                        gt_ans) else 0.0
+                else:
+                    reward = 1.0 if output_ans.lower() == gt_ans.lower() else 0.0
+        except Exception as e:
+            print(
+                f"Error in reward_fn for question_type '{question_type}': {e}")
+            reward = 0.0
+
+        rewards.append(reward)
+
+        if os.getenv("DEBUG_MODE") == "true":
+            log_path = 'debug.log'
+            with open(log_path, "a") as f:
+                try:
+                    f.write(
+                        f"------------- {current_time} Accuracy reward: {reward} -------------\n")
+                    f.write(f"Content: {content}\n")
+                    f.write(f"Solution: {sol}\n")
+                    f.write(f"type: {question_type}\n")
+                except BaseException:
+                    f.write("writeing error")
+
+    return rewards
+
+
+def format_reward(completions, **kwargs):
+    """Reward function that checks if the completion has a specific format."""
+    pattern = r"<info>.*?</info>\s<think>.*?</think>\s*<answer>.*?</answer>"
+    completion_contents = [completion[0]["content"]
+                           for completion in completions]
+    matches = [re.fullmatch(pattern, content, re.DOTALL)
+               for content in completion_contents]
+    return [1.0 if match else 0.0 for match in matches]
+
+
+reward_funcs_registry = {
+    "accuracy": accuracy_reward,
+    "format": format_reward,
+}
+
+
+SYSTEM_PROMPT = (
+    "You are tasked with analyzing an image to generate an exhaustive and detailed description. "
+    "Your goal is to extract and describe all possible information from the image, including but not limited to objects, "
+    "numbers, text, and the relationships between these elements. The description should be as fine and detailed as possible, "
+    "capturing every nuance. After generating the detailed description, you need to analyze it and provide step-by-step "
+    "detailed reasoning for the given question based on the information. Finally, provide a single word or phrase answer "
+    "to the question. The description, reasoning process and answer are enclosed within <info> </info>, <think> </think> "
+    "and <answer> </answer> tags, respectively, i.e., <info> image description here </info> <think> reasoning process here "
+    "</think> <answer> answer here </answer>"
+)
+
+
+def main(script_args, training_args, model_args):
+    # Get reward functions
+    reward_funcs = [reward_funcs_registry[func]
+                    for func in script_args.reward_funcs]
+
+    # Load the dataset
+    # dataset = load_dataset(script_args.dataset_name,
+    #                        name=script_args.dataset_config)
+    dataset = load_dataset("json", data_files=script_args.dataset_name, split='train')
+
+
+    # Format into conversation
+    def make_conversation_image(example):
+        return {
+            "prompt": [
+                {"role": "system", "content": [
+                    {"type": "text", "text": SYSTEM_PROMPT}]},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": example["problem"]},
+                    ],
+                },
+            ]
+        }
+
+    dataset = dataset.map(make_conversation_image)
+
+    if "Qwen" in model_args.model_name_or_path or "Aria" in model_args.model_name_or_path:
+        trainer_cls = Qwen2VLGRPOTrainerCap
+    else:
+        trainer_cls = GRPOTrainer
+
+    # Initialize the GRPO trainer
+    trainer = trainer_cls(
+        model=model_args.model_name_or_path,
+        reward_funcs=reward_funcs,
+        args=training_args,
+        train_dataset=dataset,
+        eval_dataset=None,
+        peft_config=get_peft_config(model_args),
+        attn_implementation=model_args.attn_implementation,
+        max_pixels=script_args.max_pixels,
+        min_pixels=script_args.min_pixels,
+        caption_reward=script_args.caption_reward,
+        caption_reward_weight=script_args.caption_reward_weight,
+    )
+
+    trainer.train()
+    # trainer.train()
+
+
+if __name__ == "__main__":
+    parser = TrlParser((GRPOScriptArguments, GRPOConfig, ModelConfig))
+    script_args, training_args, model_args = parser.parse_args_and_config()
+
+    print('training_args:\n', training_args)
+    print('script_args:\n', script_args)
+    print('model_args:\n', model_args)
+    main(script_args, training_args, model_args)
diff --git a/src/r1-v/src/open_r1/sft_video.py b/src/r1-v/src/open_r1/sft_video.py
new file mode 100644
index 0000000000000000000000000000000000000000..724e1c14764dcb1be0f9d6db1ef9a3a0d649c816
--- /dev/null
+++ b/src/r1-v/src/open_r1/sft_video.py
@@ -0,0 +1,304 @@
+# Copyright 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Example usage:
+accelerate launch \
+    --config_file=deepspeed_zero2.yaml \
+    train_video_llm.py \
+    --dataset_name mfarre/simplevideoshorts \
+    --model_name_or_path Qwen/Qwen2-VL-7B-Instruct \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --output_dir video-llm-output \
+    --bf16 \
+    --torch_dtype bfloat16 \
+    --gradient_checkpointing
+"""
+
+import os
+import json
+import random
+import requests
+import torch
+from torch.optim import AdamW
+from datasets import load_dataset
+from transformers import (
+    AutoModelForVision2Seq,
+    AutoProcessor,
+    BitsAndBytesConfig,
+    Qwen2VLProcessor,
+    Qwen2VLForConditionalGeneration,
+    Qwen2_5_VLForConditionalGeneration
+)
+from transformers import get_linear_schedule_with_warmup
+
+from trl import (
+    ModelConfig,
+    ScriptArguments,
+    SFTConfig,
+    SFTTrainer,
+    TrlParser,
+    get_kbit_device_map,
+    get_peft_config,
+)
+from accelerate import Accelerator
+from qwen_vl_utils import process_vision_info
+
+from datasets import Dataset, DatasetDict
+
+import wandb
+
+from typing import List, Dict, Any
+
+os.environ["DS_BUILD_FUSED_ADAM"] = "0"
+
+def get_current_device():
+    """Get the current device. For GPU we return the local process index to enable multiple GPU training."""
+    return Accelerator().local_process_index if torch.cuda.is_available() else "cpu"
+
+def download_video(url: str, folder: str = '/tmp/videos/') -> str:
+    """Download video if not already present locally."""
+    filename = url.split("/")[-1]
+    local_path = os.path.join(folder, filename)
+
+    if os.path.exists(local_path):
+        return local_path
+
+    try:
+        with requests.get(url, stream=True) as r:
+            r.raise_for_status()
+            with open(local_path, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+        return local_path
+    except requests.RequestException as e:
+        raise Exception(f"Failed to download video: {e}")
+
+def prepare_dataset(example: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
+    """Prepare dataset example for training."""
+
+    
+
+    system_message = "You are a helpful assistant"
+    
+    
+    QUESTION_TEMPLATE = (
+        "{Question}\n"
+        "Please think about this question as if you were a human pondering deeply. "
+        "Engage in an internal dialogue using expressions such as 'let me think', 'wait', 'Hmm', 'oh, I see', 'let's break it down', etc, or other natural language thought expressions "
+        "It's encouraged to include self-reflection or verification in the reasoning process. "
+        "Provide your detailed reasoning between the <think> </think> tags, and then give your final answer between the <answer> </answer> tags."
+    )
+
+    TYPE_TEMPLATE = {
+        "multiple choice": " Please provide only the single option letter (e.g., A, B, C, D, etc.) within the <answer> </answer> tags.",
+        "numerical": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags.",
+        "OCR": " Please transcribe text from the image/video clearly and provide your text answer within the <answer> </answer> tags.",
+        "free-form": " Please provide your text answer within the <answer> </answer> tags.",
+        "regression": " Please provide the numerical value (e.g., 42 or 3.14) within the <answer> </answer> tags."
+    }
+
+
+    
+    if example["problem_type"] == 'multiple choice':
+        question = example['problem'] + "Options:\n"
+        for op in example["options"]:
+            question += op + "\n"
+    else:
+        question = example['problem']
+
+
+    messages = [
+        {
+            "role": "system",
+            "content": [{"type": "text", "text": system_message}]
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": example['data_type'],
+                    example['data_type']: os.getcwd() + "/Video-R1-data" + example['path'][1:]
+                    # "max_pixels": 360*420,
+                    # "fps": 1.0
+                },
+                {
+                    "type": "text",
+                    "text": QUESTION_TEMPLATE.format(Question=question) + TYPE_TEMPLATE[example['problem_type']]
+                }
+            ]
+        },
+        {
+            "role": "assistant",
+            "content": [{"type": "text", "text": example['process'] + "\n" + example['solution']}]
+        }
+    ]
+    
+
+    return {"messages": messages}
+
+def collate_fn(examples: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
+    """Collate batch of examples for training."""
+    texts = []
+    # video_inputs = []
+    # image_inputs = []
+
+    for i, example in enumerate(examples):
+        try:
+
+            texts.append(processor.apply_chat_template(example["messages"], tokenize=False))
+            image_inputs, video_inputs, video_kwargs = process_vision_info(example["messages"], return_video_kwargs=True)
+            
+        except Exception as e:
+            raise ValueError(f"Failed to process example {i}: {e}")
+
+    inputs = processor(
+        text=texts,
+        images=image_inputs,
+        videos=video_inputs,
+        return_tensors="pt",
+        padding=True
+    )
+
+    labels = inputs["input_ids"].clone()
+    labels[labels == processor.tokenizer.pad_token_id] = -100
+
+    # Handle visual tokens based on processor type
+    visual_tokens = [151652, 151653, 151656] if isinstance(processor, Qwen2VLProcessor) else [
+        processor.tokenizer.convert_tokens_to_ids(processor.image_token)
+    ]
+
+    for visual_token_id in visual_tokens:
+        labels[labels == visual_token_id] = -100
+
+    inputs["labels"] = labels
+    return inputs
+
+if __name__ == "__main__":
+    # Parse arguments
+    parser = TrlParser((ScriptArguments, SFTConfig, ModelConfig))
+    script_args, training_args, model_config = parser.parse_args_and_config()
+    
+    # Configure training args
+    training_args.gradient_checkpointing_kwargs = dict(use_reentrant=False)
+    training_args.remove_unused_columns = False
+    training_args.dataset_kwargs = {"skip_prepare_dataset": True}
+
+    # Load dataset
+    if script_args.dataset_name.endswith('.json') or script_args.dataset_name.endswith('.jsonl'):
+        dataset =  DatasetDict({"train": Dataset.from_json(script_args.dataset_name)})
+    else:
+        # Load the dataset
+        dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
+
+    # Setup model
+    torch_dtype = (
+        model_config.torch_dtype
+        if model_config.torch_dtype in ["auto", None]
+        else getattr(torch, model_config.torch_dtype)
+    )
+
+    # # Quantization configuration for 4-bit training
+    # bnb_config = BitsAndBytesConfig(
+    #     load_in_4bit=True,
+    #     bnb_4bit_use_double_quant=True,
+    #     bnb_4bit_quant_type="nf4",
+    #     bnb_4bit_compute_dtype=torch.bfloat16
+    # )
+
+    # Model initialization
+    model_kwargs = dict(
+        revision=model_config.model_revision,
+        trust_remote_code=model_config.trust_remote_code,
+        torch_dtype=torch_dtype,
+        device_map=get_kbit_device_map(),
+        # quantization_config=bnb_config,
+    )
+    
+    
+    if "Qwen2-VL" in model_config.model_name_or_path:
+        model = Qwen2VLForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs)
+    elif "Qwen2.5-VL" in model_config.model_name_or_path:
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs)
+    else:
+        model = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs)
+
+    processor = AutoProcessor.from_pretrained(
+        model_config.model_name_or_path,
+        trust_remote_code=model_config.trust_remote_code
+    )
+
+    # Prepare dataset
+    prepared_dataset = [prepare_dataset(example) for example in dataset['train']]
+
+    # Initialize wandb if specified
+    if training_args.report_to == "wandb":
+        wandb.init(project="video-llm-training")
+
+    
+    '''
+    Below is added code
+    '''
+    base_lr = 2e-4
+    optimizer = AdamW(
+        params=model.parameters(),
+        lr=base_lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=1e-2
+    )
+
+    num_training_steps = len(prepared_dataset) // (
+        training_args.per_device_train_batch_size
+        * training_args.gradient_accumulation_steps
+        * training_args.world_size
+    ) * training_args.num_train_epochs
+
+    lr_scheduler = get_linear_schedule_with_warmup(
+        optimizer,
+        num_warmup_steps=int(0.05 * num_training_steps),
+        num_training_steps=num_training_steps,
+    )
+    '''
+    Above is added code
+    '''
+
+    
+    # Initialize trainer
+    trainer = SFTTrainer(
+        model=model,
+        args=training_args,
+        train_dataset=prepared_dataset,
+        data_collator=collate_fn,
+        peft_config=get_peft_config(model_config),
+        # tokenizer=processor.tokenizer
+        optimizers=(optimizer, lr_scheduler),
+    )
+
+    # Train model
+    trainer.train()
+
+    # Save final model
+
+    trainer.save_model(training_args.output_dir)
+    processor.save_pretrained(training_args.output_dir)
+
+    if trainer.accelerator.is_main_process:
+        # Restore k,v cache for fast inference
+        trainer.model.config.use_cache = True
+        trainer.model.config.save_pretrained(training_args.output_dir)
+
+    # Cleanup
+    del model
+    del trainer
+    torch.cuda.empty_cache()
+    wandb.finish()
diff --git a/src/r1-v/src/open_r1/trainer/__init__.py b/src/r1-v/src/open_r1/trainer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2179ef5dd545d63c2501afac567a521a335d66b5
--- /dev/null
+++ b/src/r1-v/src/open_r1/trainer/__init__.py
@@ -0,0 +1,12 @@
+from .grpo_trainer import Qwen2VLGRPOTrainer
+from .vllm_grpo_trainer_modified import Qwen2VLGRPOVLLMTrainerModified
+from .vllm_grpo_trainer_modified_orig import Qwen2VLGRPOVLLMTrainerModifiedOrig
+from .vllm_grpo_trainer_selfConst import Qwen2VLGRPOVLLMTrainerSelfConst
+
+
+__all__ = [
+    "Qwen2VLGRPOTrainer", 
+    "Qwen2VLGRPOVLLMTrainerModified",
+    "Qwen2VLGRPOVLLMTrainerModifiedOrig",
+    "Qwen2VLGRPOVLLMTrainerSelfConst"
+]
diff --git a/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified_error.py b/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified_error.py
new file mode 100644
index 0000000000000000000000000000000000000000..09686ebc9f8766bdb16c538ec1e3e008af15d3b3
--- /dev/null
+++ b/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified_error.py
@@ -0,0 +1,1061 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import textwrap
+from collections import defaultdict
+from typing import Any, Callable, Optional, Union
+from accelerate.utils.other import is_compiled_module
+from accelerate.utils import broadcast_object_list, gather, gather_object
+import torch
+import torch.utils.data
+import transformers
+import warnings
+from unittest.mock import patch
+from datasets import Dataset, IterableDataset
+from packaging import version
+from transformers import (
+    AriaForConditionalGeneration,
+    AriaProcessor,
+    AutoModelForCausalLM,
+    AutoModelForSequenceClassification,
+    AutoProcessor,
+    AutoTokenizer,
+    GenerationConfig,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Qwen2VLForConditionalGeneration,
+    Qwen2_5_VLForConditionalGeneration,
+    Trainer,
+    TrainerCallback,
+    is_wandb_available,
+)
+from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
+from transformers.utils import is_peft_available
+
+from trl.data_utils import (
+    apply_chat_template,
+    is_conversational,
+    maybe_apply_chat_template,
+)
+from trl.import_utils import is_vllm_available
+
+from trl.models import (
+    create_reference_model,
+    prepare_deepspeed,
+    unwrap_model_for_generation,
+)
+from trl.trainer.grpo_config import GRPOConfig
+from trl.trainer.utils import generate_model_card, get_comet_experiment_url, pad
+from trl import GRPOTrainer
+
+import copy
+
+if is_peft_available():
+    from peft import PeftConfig, get_peft_model
+
+if is_vllm_available():
+    from vllm import LLM, SamplingParams
+
+if is_wandb_available():
+    import wandb
+import torch.nn as nn
+from torch.utils.data import Sampler
+import gc
+from qwen_vl_utils import process_vision_info
+
+import re
+
+def extract_answer(predict: str) -> Optional[str]:
+    """
+    Extracts the content of the <answer>…</answer> block from `predict`.
+    Returns the inner text (with leading/trailing whitespace stripped),
+    or None if no <answer> tag is found.
+    """
+    match = re.search(r"<answer>([\s\S]*?)</answer>", predict, re.DOTALL)
+    if not match:
+        return None
+    return match.group(1).strip()
+
+def extract_info(predict: str) -> Optional[str]:
+    """
+    Extracts the content of the <answer>…</answer> block from `predict`.
+    Returns the inner text (with leading/trailing whitespace stripped),
+    or None if no <answer> tag is found.
+    """
+    match = re.search(r"<des>([\s\S]*?)</des>", predict, re.DOTALL)
+    if not match:
+        return None
+    return match.group(1).strip()
+
+
+
+# What we call a reward function is a callable that takes a list of prompts and completions and returns a list of
+# rewards. When it's a string, it's a model ID, so it's loaded as a pretrained model.
+RewardFunc = Union[str, PreTrainedModel, Callable[[list, list], list[float]]]
+
+
+class Qwen2VLGRPOVLLMTrainerModified(Trainer):
+    def __init__(
+        self,
+        model: Union[str, PreTrainedModel],
+        reward_funcs: Union[RewardFunc, list[RewardFunc]],
+        args: GRPOConfig = None,
+        script_args = None,
+        train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
+        eval_dataset: Optional[
+            Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]
+        ] = None,
+        processing_class: Optional[PreTrainedTokenizerBase] = None,
+        reward_processing_classes: Optional[
+            Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]
+        ] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[
+            Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]
+        ] = (None, None),
+        peft_config: Optional["PeftConfig"] = None,
+        # qwen2-vl related params
+        max_pixels: Optional[int] = 12845056,
+        min_pixels: Optional[int] = 3136,
+        attn_implementation: str = "flash_attention_2",
+    ):
+
+        # Args
+        if args is None:
+            model_name = model if isinstance(model, str) else model.config._name_or_path
+            model_name = model_name.split("/")[-1]
+            args = GRPOConfig(f"{model_name}-GRPO")
+
+        # Models
+        # Trained model
+        model_init_kwargs = args.model_init_kwargs or {}
+        model_init_kwargs["attn_implementation"] = attn_implementation
+        if isinstance(model, str):
+            model_id = model
+            torch_dtype = model_init_kwargs.get("torch_dtype")
+            if (
+                isinstance(torch_dtype, torch.dtype)
+                or torch_dtype == "auto"
+                or torch_dtype is None
+            ):
+                pass  # torch_dtype is already a torch.dtype or "auto" or None
+            elif isinstance(torch_dtype, str):  # it's a str, but not "auto"
+                torch_dtype = getattr(torch, torch_dtype)
+                model_init_kwargs["torch_dtype"] = torch_dtype
+            else:
+                raise ValueError(
+                    "Invalid `torch_dtype` passed to `GRPOConfig`. Expected either 'auto' or a string representing "
+                    f"a `torch.dtype` (e.g., 'float32'), but got {torch_dtype}."
+                )
+            # Disable caching if gradient checkpointing is enabled (not supported)
+            model_init_kwargs["use_cache"] = (
+                False
+                if args.gradient_checkpointing
+                else model_init_kwargs.get("use_cache")
+            )
+            if "Qwen2-VL" in model_id:
+                model = Qwen2VLForConditionalGeneration.from_pretrained(
+                    model, **model_init_kwargs
+                )
+            elif "Qwen2.5-VL" in model_id:
+                model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model, **model_init_kwargs
+                )
+            elif "Aria" in model_id:
+                model_init_kwargs.pop("use_cache")
+                model = AriaForConditionalGeneration.from_pretrained(
+                    model, **model_init_kwargs
+                )
+            else:
+                model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model, **model_init_kwargs)
+        else:
+            model_id = model.config._name_or_path
+            if args.model_init_kwargs is not None:
+                raise ValueError(
+                    "You passed `model_init_kwargs` to the `GRPOConfig`, but your model is already instantiated. "
+                    "This argument can only be used when the `model` argument is a string."
+                )
+
+        if peft_config is not None:
+            model = get_peft_model(model, peft_config)
+
+        # Reference model
+        if is_deepspeed_zero3_enabled():
+            if "Qwen2-VL" in model_id:
+                self.ref_model = Qwen2VLForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+            elif "Qwen2.5-VL" in model_id:
+                self.ref_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+            elif "Aria" in model_id:
+                self.ref_model = AriaForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+            else:
+                self.ref_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+        elif peft_config is None:
+            # If PEFT configuration is not provided, create a reference model based on the initial model.
+            self.ref_model = create_reference_model(model)
+        else:
+            # If PEFT is used, the reference model is not needed since the adapter can be disabled
+            # to revert to the initial model.
+            self.ref_model = None
+
+        # Processing class
+        if processing_class is None:
+            if "Qwen" in model_id or "Aria" in model_id:
+                processing_class = AutoProcessor.from_pretrained(model_id)
+                pad_token_id = processing_class.tokenizer.pad_token_id
+                processing_class.pad_token_id = pad_token_id
+                processing_class.eos_token_id = processing_class.tokenizer.eos_token_id
+                if "Qwen" in model_id:
+                    processing_class.image_processor.max_pixels = max_pixels
+                    processing_class.image_processor.min_pixels = min_pixels
+            else:
+                processing_class = AutoTokenizer.from_pretrained(
+                    model.config._name_or_path, padding_side="left"
+                )
+                pad_token_id = processing_class.pad_token_id
+
+        # Reward functions
+        if not isinstance(reward_funcs, list):
+            reward_funcs = [reward_funcs]
+        for i, reward_func in enumerate(reward_funcs):
+            if isinstance(reward_func, str):
+                reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained(
+                    reward_func, num_labels=1, **model_init_kwargs
+                )
+        self.reward_funcs = reward_funcs
+
+        # Reward processing class
+        if reward_processing_classes is None:
+            reward_processing_classes = [None] * len(reward_funcs)
+        elif not isinstance(reward_processing_classes, list):
+            reward_processing_classes = [reward_processing_classes]
+        else:
+            if len(reward_processing_classes) != len(reward_funcs):
+                raise ValueError(
+                    "The number of reward processing classes must match the number of reward functions."
+                )
+
+        for i, (reward_processing_class, reward_func) in enumerate(
+            zip(reward_processing_classes, reward_funcs)
+        ):
+            if isinstance(reward_func, PreTrainedModel):
+                if reward_processing_class is None:
+                    reward_processing_class = AutoTokenizer.from_pretrained(
+                        reward_func.config._name_or_path
+                    )
+                if reward_processing_class.pad_token_id is None:
+                    reward_processing_class.pad_token = (
+                        reward_processing_class.eos_token
+                    )
+                # The reward model computes the reward for the latest non-padded token in the input sequence.
+                # So it's important to set the pad token ID to the padding token ID of the processing class.
+                reward_func.config.pad_token_id = reward_processing_class.pad_token_id
+                reward_processing_classes[i] = reward_processing_class
+        self.reward_processing_classes = reward_processing_classes
+
+        # Data collator
+        def data_collator(features):  # No data collation is needed in GRPO
+            return features
+
+        # Training arguments
+        self.max_prompt_length = args.max_prompt_length
+        self.max_completion_length = (
+            args.max_completion_length
+        )  # = |o_i| in the GRPO paper
+        self.num_generations = args.num_generations  # = G in the GRPO paper
+        self.temporal = script_args.temporal
+        self.generation_config = GenerationConfig(
+            max_new_tokens=self.max_completion_length,
+            do_sample=True,
+            temperature=1,  # HACK
+            num_return_sequences=self.num_generations,
+            pad_token_id=pad_token_id,
+        )
+        self.beta = args.beta
+        
+        self.shuffled_num_generations = self.num_generations // 2
+        self.shuffled_generation_config = GenerationConfig(
+            max_new_tokens=self.max_completion_length,
+            do_sample=True,
+            top_p=0.95,  
+            temperature=1, # HACK
+            num_return_sequences=self.shuffled_num_generations,
+            pad_token_id=pad_token_id,
+        )
+        
+        self.dummy_generation_config = GenerationConfig(
+            max_new_tokens=1,
+            do_sample=True,
+            top_p=0.95,  
+            temperature=1, # HACK
+            num_return_sequences=1,
+            pad_token_id=pad_token_id,
+        )
+        self.len_control = script_args.len_control
+        self.beta = args.beta
+
+        # The trainer estimates the number of FLOPs (floating-point operations) using the number of elements in the
+        # input tensor associated with the key "input_ids". However, in GRPO, the sampled data does not include the
+        # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning:
+        # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To
+        # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True.
+        # This acts as a flag to indicate that the warning has already been issued.
+        model.warnings_issued["estimate_tokens"] = True
+
+        # Initialize the metrics
+        self._metrics = defaultdict(list)
+        self.use_vllm = args.use_vllm
+
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            callbacks=callbacks,
+            optimizers=optimizers,
+        )
+        # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the
+        # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set
+        # self.model_accepts_loss_kwargs to False to enable scaling.
+        self.model_accepts_loss_kwargs = False
+
+        if self.use_vllm:
+            if not is_vllm_available():
+                raise ImportError(
+                    "vLLM is not available and `use_vllm` is set to True. Please install vLLM with "
+                    "`pip install vllm` to use it."
+                )
+
+            if self.accelerator.is_main_process:
+                vllm_device = self.args.vllm_device
+                if vllm_device == "auto":
+                    vllm_device = f"cuda:{self.accelerator.num_processes}"  # take the next GPU idx
+                # Check that the requested device is available
+                if (
+                    vllm_device.split(":")[0] == "cuda"
+                    and int(vllm_device.split(":")[1]) >= torch.cuda.device_count()
+                ):
+                    raise ValueError(
+                        f"The requested device for vllm ({vllm_device}) is not available. You are likely using vLLM "
+                        "without restricting the number of GPUs for training. Set the `--num_processes` argument to a "
+                        "value lower than the number of GPUs available on your machine—typically, reducing it by one "
+                        f"is sufficient. In your case: `--num_processes {torch.cuda.device_count() - 1}`."
+                    )
+                # Check that the requested device is not also used for training
+                if vllm_device in {
+                    f"cuda:{idx}" for idx in range(self.accelerator.num_processes)
+                }:
+                    warnings.warn(
+                        f"The requested device {vllm_device} is also used for training. This may lead to unexpected "
+                        "behavior. It is recommended to use a dedicated device for vLLM."
+                    )
+                # vLLM is not compatible with accelerate. So we need to patch it to make sure we can (1) place the vLLM
+                # model on the desired device (world_size_patch) and (2) avoid a test that is not designed for our
+                # setting (profiling_patch).
+                world_size_patch = patch(
+                    "torch.distributed.get_world_size", return_value=1
+                )
+                profiling_patch = patch(
+                    "vllm.worker.worker.Worker._assert_memory_footprint_increased_during_profiling",
+                    return_value=None,
+                )
+                with world_size_patch, profiling_patch:
+                    print("vllm is running on: ", vllm_device)
+                    self.llm = LLM(
+                        model=model.name_or_path,
+                        device=vllm_device,
+                        gpu_memory_utilization=self.args.vllm_gpu_memory_utilization,
+                        dtype=torch.bfloat16,
+                        # Automatic Prefix Caching caches the KV cache of existing queries, so that a new query can
+                        # directly reuse the KV cache if it shares the same prefix with one of the existing queries.
+                        # This is particularly useful here because we generate completions from the same prompts.
+                        enable_prefix_caching=True,
+                        enforce_eager=True,
+                        mm_processor_kwargs=(
+                            {
+                                "max_pixels": max_pixels,
+                                "min_pixels": min_pixels,
+                            }
+                            # if "Qwen2-VL" in model_id or "Qwen2.5-VL" in model_id
+                            if False
+                            else None
+                        ),
+                        max_model_len=args.max_prompt_length + args.max_completion_length,
+                    )
+                self.sampling_params = SamplingParams(
+                    temperature=1.0,
+                    top_p=0.95,
+                    max_tokens=self.max_completion_length,
+                )
+
+            self._last_loaded_step = 0  # tag to avoid useless loading during grad accumulation
+
+            # When using vLLM, the main process is responsible for loading the model weights. This can cause process
+            # desynchronization and seems to lead to DeepSpeed hanging during initialization. To prevent this, we
+            # synchronize all processes after vLLM has been fully initialized.
+            self.accelerator.wait_for_everyone()
+        else:
+            raise ValueError(
+                "GRPOVLLMTrainerModified only supports vllm generation, please set --use_vllm True"
+            )
+
+        if self.ref_model is not None:
+            if self.is_deepspeed_enabled:
+                self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
+            else:
+                self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                self.reward_funcs[i] = self.accelerator.prepare_model(reward_func, evaluation_mode=True)
+
+    def _set_signature_columns_if_needed(self):
+        # If `self.args.remove_unused_columns` is True, non-signature columns are removed.
+        # By default, this method sets `self._signature_columns` to the model's expected inputs.
+        # In GRPOTrainer, we preprocess data, so using the model's signature columns doesn't work.
+        # Instead, we set them to the columns expected by the `training_step` method, hence the override.
+        if self._signature_columns is None:
+            self._signature_columns = ["prompt"]
+    
+        # Get the per-token log probabilities for the completions for the model and the reference model
+    def _get_per_token_logps(self, model, input_ids, **kwargs):
+        # logits = model(input_ids, attention_mask=attention_mask, pixel_values=pixel_values, image_grid_thw=image_grid_thw).logits  # (B, L, V)
+        # import pdb
+        # pdb.set_trace()
+        logits = model(input_ids, **kwargs).logits
+        logits = logits[:, :-1, :]  # (B, L-1, V), exclude the last logit: it corresponds to the next token pred
+        input_ids = input_ids[:, 1:]  # (B, L-1), exclude the first input ID since we don't have logits for it
+        # Compute the log probabilities for the input tokens. Use a loop to reduce memory peak.
+        per_token_logps = []
+        for logits_row, input_ids_row in zip(logits, input_ids):
+            log_probs = logits_row.log_softmax(dim=-1)
+            token_log_prob = torch.gather(log_probs, dim=1, index=input_ids_row.unsqueeze(1)).squeeze(1)
+            per_token_logps.append(token_log_prob)
+        return torch.stack(per_token_logps)
+
+    # Trainer "prepares" the inputs before calling `compute_loss`. It converts to tensor and move to device.
+    # Since we preprocess the data in `compute_loss`, we need to override this method to skip this step.
+    def _prepare_inputs(
+        self, inputs: dict[str, Union[torch.Tensor, Any]]
+    ) -> dict[str, Union[torch.Tensor, Any]]:
+        return inputs
+    
+    def remove_none_from_data(self, data):
+        for entry in data:
+            if "content" in entry and isinstance(entry["content"], list):
+                for sub_entry in entry["content"]:
+                    if isinstance(sub_entry, dict):
+                        keys_to_remove = [k for k, v in sub_entry.items() if v is None]
+                        for k in keys_to_remove:
+                            del sub_entry[k]
+        return data
+
+    def _vllm_generate(self, prompts_text, mm_data, n):
+        """
+        Helper that wraps the whole ‘gather-broadcast-slice-pad-decode’ dance
+        and returns (completion_ids, decoded_texts) *ON THIS RANK ONLY*.
+        `mm_data` can be None/[] for pure-text inputs.
+        """
+        device = self.accelerator.device
+
+        # --------------- gather everything to rank-0 ----------------
+        all_prompts = gather_object(prompts_text)
+        all_mm_data  = gather_object(mm_data or [[]] * len(prompts_text))
+
+        # build the multimodal inputs expected by vLLM
+        vllm_inputs = [
+            {"prompt": p, "multi_modal_data": m[0] if m else {}}
+            for p, m in zip(all_prompts, all_mm_data)
+        ]
+
+        # -------------------------------------------------------------
+        if self.accelerator.is_main_process:
+            p = copy.deepcopy(self.sampling_params)
+            p.n = n
+            outs = self.llm.generate(vllm_inputs, sampling_params=p, use_tqdm=False)
+            comp_ids = [o.token_ids for c in outs for o in c.outputs]
+        else:
+            comp_ids = [None] * (len(vllm_inputs) * n)
+
+        # broadcast back, pick this rank’s slice
+        comp_ids = broadcast_object_list(comp_ids, from_process=0)
+        lo = self.accelerator.process_index * len(prompts_text) * n
+        hi = (self.accelerator.process_index + 1) * len(prompts_text) * n
+        comp_ids = comp_ids[lo:hi]
+
+        # pad, convert to tensor → decode
+        comp_ids = [torch.tensor(x, device=device) for x in comp_ids]
+        comp_ids = pad(comp_ids, padding_value=self.processing_class.pad_token_id)
+        decoded   = self.processing_class.batch_decode(comp_ids, skip_special_tokens=True)
+        return comp_ids, decoded
+
+
+    def compute_loss(
+        self, model, inputs, return_outputs=False, num_items_in_batch=None
+    ):
+        if return_outputs:
+            raise ValueError("The GRPOTrainer does not support returning outputs")
+        # Compute the per-token log probabilities for the model
+        
+        
+        device = self.accelerator.device
+        prompts = [x["prompt"] for x in inputs]
+        # images = [x["image"] for x in inputs]
+        prompts_text = [
+            maybe_apply_chat_template(example, self.processing_class)["prompt"]
+            for example in inputs
+        ]
+        
+        input_copy = copy.deepcopy(inputs[0]['prompt'])
+        
+        input_copy = self.remove_none_from_data(input_copy)
+        
+        data_type = inputs[0]['data_type']
+        
+        if data_type == 'image':
+            input_copy[0]['content'][0]['image'] = os.getcwd() + "/Video-R1-data" + inputs[0]['path'][1:] 
+        elif data_type == 'video':
+            input_copy[0]['content'][0]['video'] = os.getcwd() + "/Video-R1-data" + inputs[0]['path'][1:] 
+        
+        
+        image_inputs, video_inputs, video_kwargs = process_vision_info(input_copy, return_video_kwargs=True)
+        
+        
+        prompt_inputs = self.processing_class(
+            text=copy.deepcopy(prompts_text),
+            images=image_inputs,
+            videos=video_inputs,
+            return_tensors="pt",
+            padding=True,
+            padding_side="left",
+            add_special_tokens=False,
+        )
+        
+        mm_data = [[data_type, image_inputs if image_inputs else video_inputs]]
+        prompt_inputs = super()._prepare_inputs(prompt_inputs)
+        prompt_ids, prompt_mask = prompt_inputs["input_ids"], prompt_inputs["attention_mask"]
+        
+        if self.max_prompt_length is not None:
+            prompt_ids = prompt_ids[:, -self.max_prompt_length :]
+            prompt_mask = prompt_mask[:, -self.max_prompt_length :]
+            
+            
+        if self.temporal:
+            if video_inputs:
+                indices = torch.randperm(video_inputs[0].size(0))
+                shuffled_video_inputs = [video_inputs[0][indices]]
+                shuffled_prompt_inputs = self.processing_class(
+                    text=copy.deepcopy(prompts_text),
+                    images=image_inputs,
+                    videos=shuffled_video_inputs,
+                    return_tensors="pt",
+                    padding=True,
+                    padding_side="left",
+                    add_special_tokens=False,
+                )
+                shuffled_mm_data = [[self.accelerator.process_index, data_type, image_inputs if image_inputs else video_inputs]]
+                shuffled_prompt_inputs = super()._prepare_inputs(shuffled_prompt_inputs)
+                shuffled_prompt_ids, shuffled_prompt_mask = shuffled_prompt_inputs["input_ids"], shuffled_prompt_inputs["attention_mask"]
+                if self.max_prompt_length is not None:
+                    shuffled_prompt_ids = shuffled_prompt_ids[:, -self.max_prompt_length :]
+                    shuffled_prompt_mask = shuffled_prompt_mask[:, -self.max_prompt_length :]
+            else:
+                shuffled_mm_data = [None]
+                    
+            
+
+        if self.args.use_vllm:
+            # First, have main process load weights if needed
+            if self.state.global_step != self._last_loaded_step:
+                with unwrap_model_for_generation(
+                    self.model,
+                    self.accelerator,
+                    gather_deepspeed3_params=True,  # TODO: fix this, self.args.ds3_gather_for_generation,
+                ) as unwrapped_model:
+                    if is_compiled_module(unwrapped_model):
+                        state_dict = unwrapped_model._orig_mod.state_dict()
+                    else:
+                        state_dict = unwrapped_model.state_dict()
+                if self.accelerator.is_main_process:
+                    llm_model = (
+                        self.llm.llm_engine.model_executor.driver_worker.model_runner.model
+                    )
+                    # import pdb
+                    # pdb.set_trace()
+                    llm_model.load_weights(state_dict.items())
+                self._last_loaded_step = self.state.global_step
+            '''
+            # Generate completions using vLLM: gather all prompts and use them in a single call in the main process
+            all_prompts_text = gather_object(prompts_text)
+            all_mm_data = gather_object(mm_data)
+            # group into pairs
+            all_multimodal_inputs = []
+
+            if self.temporal: 
+                shuffled_all_mm_data_none = gather_object(shuffled_mm_data)
+                shuffled_all_mm_data = [x for x in shuffled_all_mm_data_none if x]
+                shuffled_all_multimodal_inputs = []
+
+            # 2. Refer to TobiasLee's implementation suggestions
+            # this is a better implementation for vLLM sampling.
+            for prompt, mm_item in zip(all_prompts_text, all_mm_data):
+                all_multimodal_inputs.append({"prompt": prompt, "multi_modal_data": {mm_item[0]: mm_item[1]}})
+            
+            if self.temporal and shuffled_all_mm_data!=[]: 
+                for mm_item in shuffled_all_mm_data:    
+                    shuffled_all_multimodal_inputs.append({"prompt": all_prompts_text[mm_item[0]], "multi_modal_data": {mm_item[1]: mm_item[2]}})
+            
+            # Create sampling params with num_generations
+            if self.accelerator.is_main_process:
+                # Clone to avoid modifying original params
+                sampling_params = copy.deepcopy(self.sampling_params)
+                sampling_params.n = self.num_generations
+                # Single generate call with all prompts
+                if self.accelerator.is_main_process:
+                    outputs = self.llm.generate(
+                        all_multimodal_inputs,
+                        sampling_params=sampling_params,
+                        use_tqdm=False,
+                    )
+                # Flatten outputs: [prompt1_gen1, prompt1_gen2, ..., prompt2_gen1, prompt2_gen2, ...]
+                completion_ids = [out.token_ids for completion in outputs for out in completion.outputs]                
+                
+                if self.temporal and shuffled_all_mm_data!=[]:
+                    # Clone to avoid modifying original params
+                    shuffled_sampling_params = copy.deepcopy(self.sampling_params)
+                    shuffled_sampling_params.n = self.num_generations // 2
+                    # Single generate call with all prompts
+                    if self.accelerator.is_main_process:
+                        shuffled_outputs = self.llm.generate(
+                            shuffled_all_multimodal_inputs,
+                            sampling_params=shuffled_sampling_params,
+                            use_tqdm=False,
+                        )
+                    # Flatten outputs: [prompt1_gen1, prompt1_gen2, ..., prompt2_gen1, prompt2_gen2, ...]
+                    shuffled_completion_ids = [out.token_ids for completion in shuffled_outputs for out in completion.outputs]
+                
+                
+            else:
+                completion_ids = [None] * len(all_multimodal_inputs) * self.num_generations
+                
+                if self.temporal and shuffled_all_mm_data!=[]:
+                    shuffled_completion_ids = [None] * len(shuffled_all_multimodal_inputs) * (self.num_generations // 2)
+                    
+            
+            # broadcast and slice
+            completion_ids = broadcast_object_list(completion_ids, from_process=0)
+            process_slice = slice(
+                self.accelerator.process_index * len(prompts) * self.num_generations,
+                (self.accelerator.process_index + 1) * len(prompts) * self.num_generations,
+            )
+            completion_ids = completion_ids[process_slice]
+
+            # Pad the completions, and concatenate them with the prompts
+            completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids]
+            completion_ids = pad(
+                completion_ids, padding_value=self.processing_class.pad_token_id
+            )
+            '''
+            
+            completion_ids, completions = self._vllm_generate(
+                prompts_text,        # original text prompts
+                mm_data,             # vision payload (may be empty for text-only)
+                self.num_generations,
+            )
+            
+            prompt_ids = prompt_ids.repeat_interleave(self.num_generations, dim=0)
+            prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1)
+
+            prompt_length = prompt_ids.size(1)
+            
+            print('prompt_length:', prompt_length)
+            
+            prompt_ids = prompt_completion_ids[:, :prompt_length]
+            completion_ids = prompt_completion_ids[:, prompt_length:]
+            prompt_mask = prompt_mask.repeat_interleave(self.num_generations, dim=0)
+            
+            
+            '''
+            This is the additional code that avoids the shuffled_all_mm_data variable undefined error.
+            '''
+            if self.temporal and video_inputs:
+                # ❶ make the shuffled video batch (you already computed shuffled_video_inputs)
+                local_shuffled_mm   = [[data_type, shuffled_video_inputs]]
+                shuffled_prompts    = copy.deepcopy(prompts_text)
+
+                # ❷ generate half as many completions for each prompt
+                shuffled_completion_ids, _ = self._vllm_generate(
+                    prompts_text=shuffled_prompts,
+                    mm_data=local_shuffled_mm,
+                    n=self.num_generations // 2,
+                )
+
+                # ❸ mimic the old triple-list so later broadcast logic works unchanged
+                shuffled_all_mm_data = [[self.accelerator.process_index,
+                                        data_type,
+                                        shuffled_video_inputs]]
+            # -----------------------------------------------------------------
+            
+                if self.temporal and shuffled_all_mm_data!=[]:
+                    # broadcast and slice
+                    shuffled_completion_ids = broadcast_object_list(shuffled_completion_ids, from_process=0)
+                    process_id_list = []
+                    for mm_item in shuffled_all_mm_data:
+                        process_id_list += [mm_item[0]] * len(prompts) * (self.num_generations // 2)
+                        
+                    if video_inputs:
+                        cur_shuffled_completion_ids = []
+                        for i in range(len(process_id_list)):
+                            if self.accelerator.process_index == process_id_list[i]:
+                                cur_shuffled_completion_ids.append(shuffled_completion_ids[i])
+
+                        # Pad the completions, and concatenate them with the prompts
+                        cur_shuffled_completion_ids = [torch.tensor(ids, device=device) for ids in cur_shuffled_completion_ids]
+                        cur_shuffled_completion_ids = pad(
+                            cur_shuffled_completion_ids, padding_value=self.processing_class.pad_token_id
+                        )
+                        shuffled_completion_ids = cur_shuffled_completion_ids
+
+                
+            else:
+                raise ValueError("Only vLLM generation is supported in this version ")
+            '''Above is additional code'''
+            
+            
+            if self.temporal and shuffled_all_mm_data!=[]:
+                # broadcast and slice
+                shuffled_completion_ids = broadcast_object_list(shuffled_completion_ids, from_process=0)
+                process_id_list = []
+                for mm_item in shuffled_all_mm_data:
+                    process_id_list += [mm_item[0]] * len(prompts) * (self.num_generations // 2)
+                    
+                if video_inputs:
+                    cur_shuffled_completion_ids = []
+                    for i in range(len(process_id_list)):
+                        if self.accelerator.process_index == process_id_list[i]:
+                            cur_shuffled_completion_ids.append(shuffled_completion_ids[i])
+
+                    # Pad the completions, and concatenate them with the prompts
+                    cur_shuffled_completion_ids = [torch.tensor(ids, device=device) for ids in cur_shuffled_completion_ids]
+                    cur_shuffled_completion_ids = pad(
+                        cur_shuffled_completion_ids, padding_value=self.processing_class.pad_token_id
+                    )
+                    shuffled_completion_ids = cur_shuffled_completion_ids
+
+            
+        else:
+            raise ValueError("Only vLLM generation is supported in this version ")
+
+        # below are the same with yifan's code
+        # Mask everything after the first EOS token
+        is_eos = completion_ids == self.processing_class.eos_token_id
+        device = self.accelerator.device
+        eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=device)
+        eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)]
+        sequence_indices = torch.arange(is_eos.size(1), device=device).expand(is_eos.size(0), -1)
+        completion_mask = (sequence_indices <= eos_idx.unsqueeze(1)).int()
+
+
+        
+        prompt_inputs.pop("input_ids")
+        prompt_inputs.pop("attention_mask")
+        
+        if data_type == 'image':
+            prompt_inputs["pixel_values"] = prompt_inputs["pixel_values"].repeat(len(prompt_completion_ids), 1)
+            prompt_inputs["image_grid_thw"] = prompt_inputs["image_grid_thw"].repeat(len(prompt_completion_ids), 1)
+        # import pdb; pdb.set_trace()
+        
+
+        if data_type == 'video':
+            prompt_inputs["pixel_values_videos"] = prompt_inputs["pixel_values_videos"].repeat(len(prompt_completion_ids), 1)
+            prompt_inputs["video_grid_thw"] = prompt_inputs["video_grid_thw"].repeat(len(prompt_completion_ids), 1)
+            if 'second_per_grid_ts' in prompt_inputs:
+                del prompt_inputs["second_per_grid_ts"]
+                
+        # import pdb
+        # pdb.set_trace()
+                
+        # per_token_logps = self._get_per_token_logps(model, prompt_completion_ids, attention_mask, pixel_values, image_grid_thw)
+        per_token_logps = self._get_per_token_logps(model, prompt_completion_ids, **prompt_inputs)
+        # Get rid of the prompt (-1 because of the shift done in get_per_token_logps)
+        per_token_logps = per_token_logps[:, prompt_length - 1 :]
+        
+        gc.collect()
+        torch.cuda.empty_cache()
+                
+        with torch.inference_mode():
+            if self.ref_model is not None:
+                ref_per_token_logps = self._get_per_token_logps(self.ref_model, prompt_completion_ids, **prompt_inputs)
+            else:
+                with self.accelerator.unwrap_model(model).disable_adapter():
+                    ref_per_token_logps = self._get_per_token_logps(model, prompt_completion_ids, **prompt_inputs)
+        ref_per_token_logps = ref_per_token_logps[:, prompt_length - 1 :]
+        
+        x_clamped = torch.clamp(ref_per_token_logps - per_token_logps, min=-10, max=10)  # 限制 x 的范围
+        per_token_kl = torch.exp(x_clamped) - x_clamped - 1
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+        if self.temporal and video_inputs:
+            
+            shuffled_completions = self.processing_class.batch_decode(shuffled_completion_ids, skip_special_tokens=True)
+            if is_conversational(inputs[0]):
+                shuffled_completions = [[{"role": "assistant", "content": shuffled_completion}] for shuffled_completion in shuffled_completions]
+                
+            # Compute the rewards
+            shuffled_prompts = [prompt for prompt in prompts for _ in range(self.shuffled_num_generations)]
+            shuffled_rewards_per_func = torch.zeros(len(shuffled_prompts), len(self.reward_funcs), device=device)
+            for i, (reward_func, reward_processing_class) in enumerate(
+                zip(self.reward_funcs, self.reward_processing_classes)
+            ):
+                # Repeat all input columns (but "prompt" and "completion") to match the number of generations
+                shuffled_reward_kwargs = {key: [] for key in inputs[0].keys() if key not in ["prompt", "completion"]}
+                for key in shuffled_reward_kwargs:
+                    for example in inputs:
+                        # Repeat each value in the column for `num_generations` times
+                        shuffled_reward_kwargs[key].extend([example[key]] * self.shuffled_num_generations)
+                shuffled_output_reward_func = reward_func(prompts=shuffled_prompts, completions=shuffled_completions, **shuffled_reward_kwargs)
+                shuffled_rewards_per_func[:, i] = torch.tensor(shuffled_output_reward_func, dtype=torch.float32, device=device)
+                
+                
+
+        # Decode the generated completions
+        completions = self.processing_class.batch_decode(
+            completion_ids, skip_special_tokens=True
+        )
+        if is_conversational(inputs[0]):
+            completions = [
+                [{"role": "assistant", "content": completion}]
+                for completion in completions
+            ]
+            
+            
+        '''Below is code for second completions generation'''
+        if is_conversational(inputs[0]):
+            first_texts = [c[0]["content"] for c in completions]
+        else:
+            first_texts = completions
+
+        # ------------------------------------------------------------
+        # 2️⃣  Build follow-up prompts with `extract_info`
+        # ------------------------------------------------------------
+        follow_up_prompts = [extract_info(txt) for txt in first_texts]
+
+        # ------------------------------------------------------------
+        # 3️⃣  SECOND-hop generation  ➜  `second_completions`
+        # ------------------------------------------------------------
+        _, second_texts = self._vllm_generate(
+            follow_up_prompts,   # new prompts (pure text)
+            None,                # no vision payload
+            1                    # one follow-up per prompt
+        )
+
+        # pack in chat format if needed
+        if is_conversational(inputs[0]):
+            second_completions = [
+                [{"role": "assistant", "content": t}] for t in second_texts
+            ]
+        else:
+            second_completions = second_texts
+        
+        '''Above is code for second completions generation'''
+
+        # Compute the rewards
+        prompts = [prompt for prompt in prompts for _ in range(self.num_generations)]
+        rewards_per_func = torch.zeros(
+            len(prompts), len(self.reward_funcs), device=device
+        )
+        for i, (reward_func, reward_processing_class) in enumerate(
+            zip(self.reward_funcs, self.reward_processing_classes)
+        ):
+            reward_kwargs = {
+                key: []
+                for key in inputs[0].keys()
+                if key not in ["prompt", "completion"]
+            }
+            
+            '''Below is code for taking second generations'''
+            # every original example contributes `self.num_generations`
+            for example in inputs:
+                for _ in range(self.num_generations):          # n times
+                    for key in reward_kwargs:
+                        reward_kwargs[key].append(example[key])
+
+            # -------- call the reward function --------
+            outputs = reward_func(
+                prompts=follow_up_prompts,          # ⬅ extracted info
+                completions=second_completions,     # ⬅ fresh answers
+                **reward_kwargs,
+            )
+            rewards_per_func[:, i] = torch.tensor(outputs, dtype=torch.float32, device=device)
+            '''Above is code for taking second generations'''
+            
+            # for key in reward_kwargs:
+            #     for example in inputs:
+            #         # Repeat each value in the column for `num_generations` times
+            #         reward_kwargs[key].extend([example[key]] * self.num_generations)
+            # output_reward_func = reward_func(
+            #     prompts=prompts, completions=completions, **reward_kwargs
+            # )
+            # rewards_per_func[:, i] = torch.tensor(
+            #     output_reward_func, dtype=torch.float32, device=device
+            # )
+            
+            
+        # rewards_per_func = gather(rewards_per_func)
+        # # Sum the rewards from all reward functions
+        # rewards = rewards_per_func.sum(dim=1)
+        
+        # process_slice = slice(
+        #     self.accelerator.process_index * len(prompts),
+        #     (self.accelerator.process_index + 1) * len(prompts),
+        # )
+        
+        # rewards = rewards[process_slice]
+        
+        
+        
+        if self.temporal and video_inputs:
+            temporal_rewards_per_func = rewards_per_func.clone()
+            
+            acc_mean = temporal_rewards_per_func[:, 0].mean()
+            shuffled_acc_mean = shuffled_rewards_per_func[:, 0].mean()
+
+            if acc_mean >= 0.8 * shuffled_acc_mean:
+                mask = temporal_rewards_per_func[:, 0] > 0.1
+                temporal_rewards_per_func[mask, 0] = temporal_rewards_per_func[mask, 0] + 0.3
+                temporal_rewards = torch.tensor([1.0]).to('cuda')
+            else:
+                temporal_rewards = torch.tensor([0.0]).to('cuda')
+        else:
+            temporal_rewards =  torch.tensor([0.5]).to('cuda')
+        
+        # Sum the rewards from all reward functions
+        if self.temporal and video_inputs:
+            rewards = temporal_rewards_per_func.sum(dim=1)
+        else:
+            rewards = rewards_per_func.sum(dim=1)
+            
+        if self.len_control:
+            mem_rewards = [0] * self.num_generations
+            mask = rewards_per_func[:, 0] > 0.1
+            lenth_list = completion_mask.sum(1)
+            selected_indices = torch.nonzero(mask, as_tuple=True)[0].tolist()
+            #             if len(selected_indices) > 1 and len(selected_indices) < self.num_generations:
+            # if len(selected_indices) > 1:
+            #     selected_items = [(i, lenth_list[i]) for i in selected_indices]
+            #     sorted_items = sorted(selected_items, key=lambda x: x[1], reverse=True)
+            #     N = len(sorted_items)
+            #     for rank, (idx, length) in enumerate(sorted_items):
+            #         reward = 0.2 - 0.2 * (rank / N)
+            #         rewards[idx] += reward
+            #         mem_rewards[idx] = reward
+            # for idx in range(len(lenth_list)):
+            #     if lenth_list[idx] >= 512:
+            #         rewards[idx] -= 0.5
+                    
+            if len(selected_indices) > 1:     
+                for idx in selected_indices:
+                    if 320 <= lenth_list[idx] <= 512:
+                        rewards[idx] += 0.2
+        
+        print(rewards)
+        print(completion_mask.sum(1))
+
+        # Compute grouped-wise rewards
+        mean_grouped_rewards = rewards.view(-1, self.num_generations).mean(dim=1)
+        std_grouped_rewards = rewards.view(-1, self.num_generations).std(dim=1)
+
+        # Normalize the rewards to compute the advantages
+        mean_grouped_rewards = mean_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        std_grouped_rewards = std_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        advantages = (rewards - mean_grouped_rewards) / (std_grouped_rewards + 1e-4)
+
+        # x - x.detach() allows for preserving gradients from x
+        per_token_loss = torch.exp(per_token_logps - per_token_logps.detach()) * advantages.unsqueeze(1)
+        per_token_loss = -(per_token_loss - self.beta * per_token_kl)
+        # per_token_loss = -per_token_loss
+        loss = ((per_token_loss * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+       
+            
+        # import pdb
+        # pdb.set_trace()
+
+        # Log the metrics
+        completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
+        self._metrics["completion_length"].append(completion_length)
+
+        reward_per_func = self.accelerator.gather_for_metrics(rewards_per_func).mean(0)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                reward_func_name = reward_func.config._name_or_path.split("/")[-1]
+            else:
+                reward_func_name = reward_func.__name__
+            self._metrics[f"rewards/{reward_func_name}"].append(reward_per_func[i].item())
+        
+        gathered_rewards = self.accelerator.gather_for_metrics(rewards)
+        
+        num_devices = gathered_rewards.size(0) // self.num_generations 
+        rewards_per_device = gathered_rewards.view(num_devices, self.num_generations)
+        wrong_devices = (rewards_per_device <= 1).all(dim=1)
+        wrong_ratio = wrong_devices.sum().item() / num_devices
+        
+        correct_devices = (rewards_per_device >= 2).all(dim=1)
+        correct_ratio = correct_devices.sum().item() / num_devices
+        
+        self._metrics["all_wrong"].append(wrong_ratio)
+        self._metrics["all_correct"].append(correct_ratio)
+        
+        if self.temporal:
+            temporal_rewards_list = self.accelerator.gather_for_metrics(temporal_rewards)
+            self._metrics["temporal_rewards"].append(self.accelerator.gather_for_metrics(temporal_rewards_list).mean().item())
+        
+        self._metrics["reward"].append(self.accelerator.gather_for_metrics(rewards).mean().item())
+
+        self._metrics["reward_std"].append(self.accelerator.gather_for_metrics(std_grouped_rewards).mean().item())
+
+        mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+        self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
+        
+
+        return loss
+    
+
+
+        
+    def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
+        metrics = {key: sum(val) / len(val) for key, val in self._metrics.items()}  # average the metrics
+
+        # This method can be called both in training and evaluation. When called in evaluation, the keys in `logs`
+        # start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format.
+        if next(iter(logs.keys())).startswith("eval_"):
+            metrics = {f"eval_{key}": val for key, val in metrics.items()}
+
+        logs = {**logs, **metrics}
+        if version.parse(transformers.__version__) >= version.parse("4.47.0.dev0"):
+            super().log(logs, start_time)
+        else:  # transformers<=4.46
+            super().log(logs)
+        self._metrics.clear()
\ No newline at end of file
diff --git a/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified_orig.py b/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified_orig.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d475bbebac9bdf278a250e903fc02833c20fc4c
--- /dev/null
+++ b/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified_orig.py
@@ -0,0 +1,935 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import textwrap
+from collections import defaultdict
+from typing import Any, Callable, Optional, Union
+from accelerate.utils.other import is_compiled_module
+from accelerate.utils import broadcast_object_list, gather, gather_object
+import torch
+import torch.utils.data
+import transformers
+import warnings
+from unittest.mock import patch
+from datasets import Dataset, IterableDataset
+from packaging import version
+from transformers import (
+    AriaForConditionalGeneration,
+    AriaProcessor,
+    AutoModelForCausalLM,
+    AutoModelForSequenceClassification,
+    AutoProcessor,
+    AutoTokenizer,
+    GenerationConfig,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Qwen2VLForConditionalGeneration,
+    Qwen2_5_VLForConditionalGeneration,
+    Trainer,
+    TrainerCallback,
+    is_wandb_available,
+)
+from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
+from transformers.utils import is_peft_available
+
+from trl.data_utils import (
+    apply_chat_template,
+    is_conversational,
+    maybe_apply_chat_template,
+)
+from trl.import_utils import is_vllm_available
+
+from trl.models import (
+    create_reference_model,
+    prepare_deepspeed,
+    unwrap_model_for_generation,
+)
+from trl.trainer.grpo_config import GRPOConfig
+from trl.trainer.utils import generate_model_card, get_comet_experiment_url, pad
+from trl import GRPOTrainer
+
+import copy
+
+if is_peft_available():
+    from peft import PeftConfig, get_peft_model
+
+if is_vllm_available():
+    from vllm import LLM, SamplingParams
+
+if is_wandb_available():
+    import wandb
+import torch.nn as nn
+from torch.utils.data import Sampler
+import gc
+from qwen_vl_utils import process_vision_info
+
+# What we call a reward function is a callable that takes a list of prompts and completions and returns a list of
+# rewards. When it's a string, it's a model ID, so it's loaded as a pretrained model.
+RewardFunc = Union[str, PreTrainedModel, Callable[[list, list], list[float]]]
+
+
+class Qwen2VLGRPOVLLMTrainerModifiedOrig(Trainer):
+    def __init__(
+        self,
+        model: Union[str, PreTrainedModel],
+        reward_funcs: Union[RewardFunc, list[RewardFunc]],
+        args: GRPOConfig = None,
+        script_args = None,
+        train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
+        eval_dataset: Optional[
+            Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]
+        ] = None,
+        processing_class: Optional[PreTrainedTokenizerBase] = None,
+        reward_processing_classes: Optional[
+            Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]
+        ] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[
+            Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]
+        ] = (None, None),
+        peft_config: Optional["PeftConfig"] = None,
+        # qwen2-vl related params
+        max_pixels: Optional[int] = 12845056,
+        min_pixels: Optional[int] = 3136,
+        attn_implementation: str = "flash_attention_2",
+    ):
+
+        # Args
+        if args is None:
+            model_name = model if isinstance(model, str) else model.config._name_or_path
+            model_name = model_name.split("/")[-1]
+            args = GRPOConfig(f"{model_name}-GRPO")
+
+        # Models
+        # Trained model
+        model_init_kwargs = args.model_init_kwargs or {}
+        model_init_kwargs["attn_implementation"] = attn_implementation
+        if isinstance(model, str):
+            model_id = model
+            torch_dtype = model_init_kwargs.get("torch_dtype")
+            if (
+                isinstance(torch_dtype, torch.dtype)
+                or torch_dtype == "auto"
+                or torch_dtype is None
+            ):
+                pass  # torch_dtype is already a torch.dtype or "auto" or None
+            elif isinstance(torch_dtype, str):  # it's a str, but not "auto"
+                torch_dtype = getattr(torch, torch_dtype)
+                model_init_kwargs["torch_dtype"] = torch_dtype
+            else:
+                raise ValueError(
+                    "Invalid `torch_dtype` passed to `GRPOConfig`. Expected either 'auto' or a string representing "
+                    f"a `torch.dtype` (e.g., 'float32'), but got {torch_dtype}."
+                )
+            # Disable caching if gradient checkpointing is enabled (not supported)
+            model_init_kwargs["use_cache"] = (
+                False
+                if args.gradient_checkpointing
+                else model_init_kwargs.get("use_cache")
+            )
+            if "Qwen2-VL" in model_id:
+                model = Qwen2VLForConditionalGeneration.from_pretrained(
+                    model, **model_init_kwargs
+                )
+            elif "Qwen2.5-VL" in model_id:
+                model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model, **model_init_kwargs
+                )
+            elif "Aria" in model_id:
+                model_init_kwargs.pop("use_cache")
+                model = AriaForConditionalGeneration.from_pretrained(
+                    model, **model_init_kwargs
+                )
+            else:
+                model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model, **model_init_kwargs)
+        else:
+            model_id = model.config._name_or_path
+            if args.model_init_kwargs is not None:
+                raise ValueError(
+                    "You passed `model_init_kwargs` to the `GRPOConfig`, but your model is already instantiated. "
+                    "This argument can only be used when the `model` argument is a string."
+                )
+
+        if peft_config is not None:
+            model = get_peft_model(model, peft_config)
+
+        # Reference model
+        if is_deepspeed_zero3_enabled():
+            if "Qwen2-VL" in model_id:
+                self.ref_model = Qwen2VLForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+            elif "Qwen2.5-VL" in model_id:
+                self.ref_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+            elif "Aria" in model_id:
+                self.ref_model = AriaForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+            else:
+                self.ref_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model_id, **model_init_kwargs
+                )
+        elif peft_config is None:
+            # If PEFT configuration is not provided, create a reference model based on the initial model.
+            self.ref_model = create_reference_model(model)
+        else:
+            # If PEFT is used, the reference model is not needed since the adapter can be disabled
+            # to revert to the initial model.
+            self.ref_model = None
+
+        # Processing class
+        # if processing_class is None:
+        #     if "Qwen" in model_id or "Aria" in model_id:
+        #         processing_class = AutoProcessor.from_pretrained(model_id)
+        #         pad_token_id = processing_class.tokenizer.pad_token_id
+        #         processing_class.pad_token_id = pad_token_id
+        #         processing_class.eos_token_id = processing_class.tokenizer.eos_token_id
+        #         if "Qwen" in model_id:
+        #             processing_class.image_processor.max_pixels = max_pixels
+        #             processing_class.image_processor.min_pixels = min_pixels
+        #     else:
+        #         processing_class = AutoTokenizer.from_pretrained(
+        #             model.config._name_or_path, padding_side="left"
+        #         )
+        #         pad_token_id = processing_class.pad_token_id
+        
+        
+        # ────────────────────────────────────────────────────────────────
+        # Robust processor loading ― works for both fresh models *and* checkpoints
+        # ────────────────────────────────────────────────────────────────
+        if processing_class is None:
+            # 1️⃣  First try to load whatever lives in the directory we were given.
+            #     This succeeds if you previously did `processor.save_pretrained(output_dir)`.
+            try:
+                processing_class = AutoProcessor.from_pretrained(model_id)
+                pad_token_id = processing_class.tokenizer.pad_token_id
+            except (OSError, ValueError):          # no processor files found
+                # 2️⃣  Fall back to inspecting the *model object* instead of the path.
+                is_vl_model = (
+                    hasattr(model, "vision_tower") or         # Qwen-VL, InternVL, etc.
+                    getattr(model.config, "vision_config", None) is not None or
+                    getattr(model.config, "image_vocab_size", None) is not None
+                )
+
+                if is_vl_model:
+                    # Always use the *base* model name stored in the config.
+                    base_name = model.config._name_or_path     # e.g. "Qwen/Qwen2.5-VL-7B-Instruct"
+                    processing_class = AutoProcessor.from_pretrained(base_name)
+                    pad_token_id = processing_class.tokenizer.pad_token_id
+
+                    # Optional Qwen-specific limits
+                    if hasattr(processing_class, "image_processor"):
+                        processing_class.image_processor.max_pixels = max_pixels
+                        processing_class.image_processor.min_pixels = min_pixels
+                else:
+                    # Pure text model → plain tokenizer
+                    processing_class = AutoTokenizer.from_pretrained(
+                        model.config._name_or_path, padding_side="left"
+                    )
+                    pad_token_id = processing_class.pad_token_id
+
+            # 3️⃣  Harmonise attributes the rest of the trainer expects
+            processing_class.pad_token_id = pad_token_id
+            if not hasattr(processing_class, "eos_token_id"):
+                processing_class.eos_token_id = pad_token_id
+        # ────────────────────────────────────────────────────────────────
+
+        # Reward functions
+        if not isinstance(reward_funcs, list):
+            reward_funcs = [reward_funcs]
+        for i, reward_func in enumerate(reward_funcs):
+            if isinstance(reward_func, str):
+                reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained(
+                    reward_func, num_labels=1, **model_init_kwargs
+                )
+        self.reward_funcs = reward_funcs
+
+        # Reward processing class
+        if reward_processing_classes is None:
+            reward_processing_classes = [None] * len(reward_funcs)
+        elif not isinstance(reward_processing_classes, list):
+            reward_processing_classes = [reward_processing_classes]
+        else:
+            if len(reward_processing_classes) != len(reward_funcs):
+                raise ValueError(
+                    "The number of reward processing classes must match the number of reward functions."
+                )
+
+        for i, (reward_processing_class, reward_func) in enumerate(
+            zip(reward_processing_classes, reward_funcs)
+        ):
+            if isinstance(reward_func, PreTrainedModel):
+                if reward_processing_class is None:
+                    reward_processing_class = AutoTokenizer.from_pretrained(
+                        reward_func.config._name_or_path
+                    )
+                if reward_processing_class.pad_token_id is None:
+                    reward_processing_class.pad_token = (
+                        reward_processing_class.eos_token
+                    )
+                # The reward model computes the reward for the latest non-padded token in the input sequence.
+                # So it's important to set the pad token ID to the padding token ID of the processing class.
+                reward_func.config.pad_token_id = reward_processing_class.pad_token_id
+                reward_processing_classes[i] = reward_processing_class
+        self.reward_processing_classes = reward_processing_classes
+
+        # Data collator
+        def data_collator(features):  # No data collation is needed in GRPO
+            return features
+
+        # Training arguments
+        self.max_prompt_length = args.max_prompt_length
+        self.max_completion_length = (
+            args.max_completion_length
+        )  # = |o_i| in the GRPO paper
+        self.num_generations = args.num_generations  # = G in the GRPO paper
+        self.temporal = script_args.temporal
+        self.generation_config = GenerationConfig(
+            max_new_tokens=self.max_completion_length,
+            do_sample=True,
+            temperature=1,  # HACK
+            num_return_sequences=self.num_generations,
+            pad_token_id=pad_token_id,
+        )
+        self.beta = args.beta
+        
+        self.shuffled_num_generations = self.num_generations // 2
+        self.shuffled_generation_config = GenerationConfig(
+            max_new_tokens=self.max_completion_length,
+            do_sample=True,
+            top_p=0.95,  
+            temperature=1, # HACK
+            num_return_sequences=self.shuffled_num_generations,
+            pad_token_id=pad_token_id,
+        )
+        
+        self.dummy_generation_config = GenerationConfig(
+            max_new_tokens=1,
+            do_sample=True,
+            top_p=0.95,  
+            temperature=1, # HACK
+            num_return_sequences=1,
+            pad_token_id=pad_token_id,
+        )
+        self.len_control = script_args.len_control
+        self.beta = args.beta
+
+        # The trainer estimates the number of FLOPs (floating-point operations) using the number of elements in the
+        # input tensor associated with the key "input_ids". However, in GRPO, the sampled data does not include the
+        # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning:
+        # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To
+        # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True.
+        # This acts as a flag to indicate that the warning has already been issued.
+        model.warnings_issued["estimate_tokens"] = True
+
+        # Initialize the metrics
+        self._metrics = defaultdict(list)
+        self.use_vllm = args.use_vllm
+
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            callbacks=callbacks,
+            optimizers=optimizers,
+        )
+        # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the
+        # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set
+        # self.model_accepts_loss_kwargs to False to enable scaling.
+        self.model_accepts_loss_kwargs = False
+
+        if self.use_vllm:
+            if not is_vllm_available():
+                raise ImportError(
+                    "vLLM is not available and `use_vllm` is set to True. Please install vLLM with "
+                    "`pip install vllm` to use it."
+                )
+
+            if self.accelerator.is_main_process:
+                vllm_device = self.args.vllm_device
+                if vllm_device == "auto":
+                    vllm_device = f"cuda:{self.accelerator.num_processes}"  # take the next GPU idx
+                # Check that the requested device is available
+                if (
+                    vllm_device.split(":")[0] == "cuda"
+                    and int(vllm_device.split(":")[1]) >= torch.cuda.device_count()
+                ):
+                    raise ValueError(
+                        f"The requested device for vllm ({vllm_device}) is not available. You are likely using vLLM "
+                        "without restricting the number of GPUs for training. Set the `--num_processes` argument to a "
+                        "value lower than the number of GPUs available on your machine—typically, reducing it by one "
+                        f"is sufficient. In your case: `--num_processes {torch.cuda.device_count() - 1}`."
+                    )
+                # Check that the requested device is not also used for training
+                if vllm_device in {
+                    f"cuda:{idx}" for idx in range(self.accelerator.num_processes)
+                }:
+                    warnings.warn(
+                        f"The requested device {vllm_device} is also used for training. This may lead to unexpected "
+                        "behavior. It is recommended to use a dedicated device for vLLM."
+                    )
+                # vLLM is not compatible with accelerate. So we need to patch it to make sure we can (1) place the vLLM
+                # model on the desired device (world_size_patch) and (2) avoid a test that is not designed for our
+                # setting (profiling_patch).
+                world_size_patch = patch(
+                    "torch.distributed.get_world_size", return_value=1
+                )
+                profiling_patch = patch(
+                    "vllm.worker.worker.Worker._assert_memory_footprint_increased_during_profiling",
+                    return_value=None,
+                )
+                with world_size_patch, profiling_patch:
+                    print("vllm is running on: ", vllm_device)
+                    self.llm = LLM(
+                        model=model.name_or_path,
+                        device=vllm_device,
+                        gpu_memory_utilization=self.args.vllm_gpu_memory_utilization,
+                        dtype=torch.bfloat16,
+                        # Automatic Prefix Caching caches the KV cache of existing queries, so that a new query can
+                        # directly reuse the KV cache if it shares the same prefix with one of the existing queries.
+                        # This is particularly useful here because we generate completions from the same prompts.
+                        enable_prefix_caching=True,
+                        enforce_eager=True,
+                        mm_processor_kwargs=(
+                            {
+                                "max_pixels": max_pixels,
+                                "min_pixels": min_pixels,
+                            }
+                            # if "Qwen2-VL" in model_id or "Qwen2.5-VL" in model_id
+                            if False
+                            else None
+                        ),
+                        max_model_len=args.max_prompt_length + args.max_completion_length,
+                    )
+                self.sampling_params = SamplingParams(
+                    temperature=1.0,
+                    top_p=0.95,
+                    max_tokens=self.max_completion_length,
+                )
+
+            self._last_loaded_step = 0  # tag to avoid useless loading during grad accumulation
+
+            # When using vLLM, the main process is responsible for loading the model weights. This can cause process
+            # desynchronization and seems to lead to DeepSpeed hanging during initialization. To prevent this, we
+            # synchronize all processes after vLLM has been fully initialized.
+            self.accelerator.wait_for_everyone()
+        else:
+            raise ValueError(
+                "GRPOVLLMTrainerModified only supports vllm generation, please set --use_vllm True"
+            )
+
+        if self.ref_model is not None:
+            if self.is_deepspeed_enabled:
+                self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
+            else:
+                self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                self.reward_funcs[i] = self.accelerator.prepare_model(reward_func, evaluation_mode=True)
+
+    def _set_signature_columns_if_needed(self):
+        # If `self.args.remove_unused_columns` is True, non-signature columns are removed.
+        # By default, this method sets `self._signature_columns` to the model's expected inputs.
+        # In GRPOTrainer, we preprocess data, so using the model's signature columns doesn't work.
+        # Instead, we set them to the columns expected by the `training_step` method, hence the override.
+        if self._signature_columns is None:
+            self._signature_columns = ["prompt"]
+    
+        # Get the per-token log probabilities for the completions for the model and the reference model
+    def _get_per_token_logps(self, model, input_ids, **kwargs):
+        # logits = model(input_ids, attention_mask=attention_mask, pixel_values=pixel_values, image_grid_thw=image_grid_thw).logits  # (B, L, V)
+        # import pdb
+        # pdb.set_trace()
+        logits = model(input_ids, **kwargs).logits
+        logits = logits[:, :-1, :]  # (B, L-1, V), exclude the last logit: it corresponds to the next token pred
+        input_ids = input_ids[:, 1:]  # (B, L-1), exclude the first input ID since we don't have logits for it
+        # Compute the log probabilities for the input tokens. Use a loop to reduce memory peak.
+        per_token_logps = []
+        for logits_row, input_ids_row in zip(logits, input_ids):
+            log_probs = logits_row.log_softmax(dim=-1)
+            token_log_prob = torch.gather(log_probs, dim=1, index=input_ids_row.unsqueeze(1)).squeeze(1)
+            per_token_logps.append(token_log_prob)
+        return torch.stack(per_token_logps)
+
+    # Trainer "prepares" the inputs before calling `compute_loss`. It converts to tensor and move to device.
+    # Since we preprocess the data in `compute_loss`, we need to override this method to skip this step.
+    def _prepare_inputs(
+        self, inputs: dict[str, Union[torch.Tensor, Any]]
+    ) -> dict[str, Union[torch.Tensor, Any]]:
+        return inputs
+    
+    def remove_none_from_data(self, data):
+        for entry in data:
+            if "content" in entry and isinstance(entry["content"], list):
+                for sub_entry in entry["content"]:
+                    if isinstance(sub_entry, dict):
+                        keys_to_remove = [k for k, v in sub_entry.items() if v is None]
+                        for k in keys_to_remove:
+                            del sub_entry[k]
+        return data
+
+
+
+    def compute_loss(
+        self, model, inputs, return_outputs=False, num_items_in_batch=None
+    ):
+        if return_outputs:
+            raise ValueError("The GRPOTrainer does not support returning outputs")
+        # Compute the per-token log probabilities for the model
+        
+        
+        device = self.accelerator.device
+        prompts = [x["prompt"] for x in inputs]
+        # images = [x["image"] for x in inputs]
+        prompts_text = [
+            maybe_apply_chat_template(example, self.processing_class)["prompt"]
+            for example in inputs
+        ]
+        
+        input_copy = copy.deepcopy(inputs[0]['prompt'])
+        
+        input_copy = self.remove_none_from_data(input_copy)
+        
+        data_type = inputs[0]['data_type']
+        
+        if data_type == 'image':
+            input_copy[0]['content'][0]['image'] = os.getcwd() + "/Video-R1-data" + inputs[0]['path'][1:] 
+        elif data_type == 'video':
+            input_copy[0]['content'][0]['video'] = os.getcwd() + "/Video-R1-data" + inputs[0]['path'][1:] 
+        
+        
+        image_inputs, video_inputs, video_kwargs = process_vision_info(input_copy, return_video_kwargs=True)
+        
+        
+        prompt_inputs = self.processing_class(
+            text=copy.deepcopy(prompts_text),
+            images=image_inputs,
+            videos=video_inputs,
+            return_tensors="pt",
+            padding=True,
+            padding_side="left",
+            add_special_tokens=False,
+        )
+        
+        mm_data = [[data_type, image_inputs if image_inputs else video_inputs]]
+        prompt_inputs = super()._prepare_inputs(prompt_inputs)
+        prompt_ids, prompt_mask = prompt_inputs["input_ids"], prompt_inputs["attention_mask"]
+        
+        if self.max_prompt_length is not None:
+            prompt_ids = prompt_ids[:, -self.max_prompt_length :]
+            prompt_mask = prompt_mask[:, -self.max_prompt_length :]
+            
+            
+        if self.temporal:
+            if video_inputs:
+                indices = torch.randperm(video_inputs[0].size(0))
+                shuffled_video_inputs = [video_inputs[0][indices]]
+                shuffled_prompt_inputs = self.processing_class(
+                    text=copy.deepcopy(prompts_text),
+                    images=image_inputs,
+                    videos=shuffled_video_inputs,
+                    return_tensors="pt",
+                    padding=True,
+                    padding_side="left",
+                    add_special_tokens=False,
+                )
+                shuffled_mm_data = [[self.accelerator.process_index, data_type, image_inputs if image_inputs else video_inputs]]
+                shuffled_prompt_inputs = super()._prepare_inputs(shuffled_prompt_inputs)
+                shuffled_prompt_ids, shuffled_prompt_mask = shuffled_prompt_inputs["input_ids"], shuffled_prompt_inputs["attention_mask"]
+                if self.max_prompt_length is not None:
+                    shuffled_prompt_ids = shuffled_prompt_ids[:, -self.max_prompt_length :]
+                    shuffled_prompt_mask = shuffled_prompt_mask[:, -self.max_prompt_length :]
+            else:
+                shuffled_mm_data = [None]
+                    
+            
+
+        if self.args.use_vllm:
+            # First, have main process load weights if needed
+            if self.state.global_step != self._last_loaded_step:
+                with unwrap_model_for_generation(
+                    self.model,
+                    self.accelerator,
+                    gather_deepspeed3_params=True,  # TODO: fix this, self.args.ds3_gather_for_generation,
+                ) as unwrapped_model:
+                    if is_compiled_module(unwrapped_model):
+                        state_dict = unwrapped_model._orig_mod.state_dict()
+                    else:
+                        state_dict = unwrapped_model.state_dict()
+                if self.accelerator.is_main_process:
+                    llm_model = (
+                        self.llm.llm_engine.model_executor.driver_worker.model_runner.model
+                    )
+                    # import pdb
+                    # pdb.set_trace()
+                    llm_model.load_weights(state_dict.items())
+                self._last_loaded_step = self.state.global_step
+
+            # Generate completions using vLLM: gather all prompts and use them in a single call in the main process
+            all_prompts_text = gather_object(prompts_text)
+            all_mm_data = gather_object(mm_data)
+            # group into pairs
+            all_multimodal_inputs = []
+
+            if self.temporal: 
+                shuffled_all_mm_data_none = gather_object(shuffled_mm_data)
+                shuffled_all_mm_data = [x for x in shuffled_all_mm_data_none if x]
+                shuffled_all_multimodal_inputs = []
+
+            # 2. Refer to TobiasLee's implementation suggestions
+            # this is a better implementation for vLLM sampling.
+            for prompt, mm_item in zip(all_prompts_text, all_mm_data):
+                all_multimodal_inputs.append({"prompt": prompt, "multi_modal_data": {mm_item[0]: mm_item[1]}})
+            
+            if self.temporal and shuffled_all_mm_data!=[]: 
+                for mm_item in shuffled_all_mm_data:    
+                    shuffled_all_multimodal_inputs.append({"prompt": all_prompts_text[mm_item[0]], "multi_modal_data": {mm_item[1]: mm_item[2]}})
+            
+            # Create sampling params with num_generations
+            if self.accelerator.is_main_process:
+                # Clone to avoid modifying original params
+                sampling_params = copy.deepcopy(self.sampling_params)
+                sampling_params.n = self.num_generations
+                # Single generate call with all prompts
+                if self.accelerator.is_main_process:
+                    outputs = self.llm.generate(
+                        all_multimodal_inputs,
+                        sampling_params=sampling_params,
+                        use_tqdm=False,
+                    )
+                # Flatten outputs: [prompt1_gen1, prompt1_gen2, ..., prompt2_gen1, prompt2_gen2, ...]
+                completion_ids = [out.token_ids for completion in outputs for out in completion.outputs]                
+                
+                if self.temporal and shuffled_all_mm_data!=[]:
+                    # Clone to avoid modifying original params
+                    shuffled_sampling_params = copy.deepcopy(self.sampling_params)
+                    shuffled_sampling_params.n = self.num_generations // 2
+                    # Single generate call with all prompts
+                    if self.accelerator.is_main_process:
+                        shuffled_outputs = self.llm.generate(
+                            shuffled_all_multimodal_inputs,
+                            sampling_params=shuffled_sampling_params,
+                            use_tqdm=False,
+                        )
+                    # Flatten outputs: [prompt1_gen1, prompt1_gen2, ..., prompt2_gen1, prompt2_gen2, ...]
+                    shuffled_completion_ids = [out.token_ids for completion in shuffled_outputs for out in completion.outputs]
+                
+                
+            else:
+                completion_ids = [None] * len(all_multimodal_inputs) * self.num_generations
+                
+                if self.temporal and shuffled_all_mm_data!=[]:
+                    shuffled_completion_ids = [None] * len(shuffled_all_multimodal_inputs) * (self.num_generations // 2)
+                    
+            
+            # broadcast and slice
+            completion_ids = broadcast_object_list(completion_ids, from_process=0)
+            process_slice = slice(
+                self.accelerator.process_index * len(prompts) * self.num_generations,
+                (self.accelerator.process_index + 1) * len(prompts) * self.num_generations,
+            )
+            completion_ids = completion_ids[process_slice]
+
+            # Pad the completions, and concatenate them with the prompts
+            completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids]
+            completion_ids = pad(
+                completion_ids, padding_value=self.processing_class.pad_token_id
+            )
+            prompt_ids = prompt_ids.repeat_interleave(self.num_generations, dim=0)
+            prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1)
+
+            prompt_length = prompt_ids.size(1)
+            
+            # print('prompt_length:', prompt_length)
+            
+            prompt_ids = prompt_completion_ids[:, :prompt_length]
+            completion_ids = prompt_completion_ids[:, prompt_length:]
+            prompt_mask = prompt_mask.repeat_interleave(self.num_generations, dim=0)
+            
+            
+            if self.temporal and shuffled_all_mm_data!=[]:
+                # broadcast and slice
+                shuffled_completion_ids = broadcast_object_list(shuffled_completion_ids, from_process=0)
+                process_id_list = []
+                for mm_item in shuffled_all_mm_data:
+                    process_id_list += [mm_item[0]] * len(prompts) * (self.num_generations // 2)
+                    
+                if video_inputs:
+                    cur_shuffled_completion_ids = []
+                    for i in range(len(process_id_list)):
+                        if self.accelerator.process_index == process_id_list[i]:
+                            cur_shuffled_completion_ids.append(shuffled_completion_ids[i])
+
+                    # Pad the completions, and concatenate them with the prompts
+                    cur_shuffled_completion_ids = [torch.tensor(ids, device=device) for ids in cur_shuffled_completion_ids]
+                    cur_shuffled_completion_ids = pad(
+                        cur_shuffled_completion_ids, padding_value=self.processing_class.pad_token_id
+                    )
+                    shuffled_completion_ids = cur_shuffled_completion_ids
+
+            
+        else:
+            raise ValueError("Only vLLM generation is supported in this version ")
+
+        # below are the same with yifan's code
+        # Mask everything after the first EOS token
+        is_eos = completion_ids == self.processing_class.eos_token_id
+        device = self.accelerator.device
+        eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=device)
+        eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)]
+        sequence_indices = torch.arange(is_eos.size(1), device=device).expand(is_eos.size(0), -1)
+        completion_mask = (sequence_indices <= eos_idx.unsqueeze(1)).int()
+
+
+        
+        prompt_inputs.pop("input_ids")
+        prompt_inputs.pop("attention_mask")
+        
+        if data_type == 'image':
+            prompt_inputs["pixel_values"] = prompt_inputs["pixel_values"].repeat(len(prompt_completion_ids), 1)
+            prompt_inputs["image_grid_thw"] = prompt_inputs["image_grid_thw"].repeat(len(prompt_completion_ids), 1)
+        # import pdb; pdb.set_trace()
+        
+
+        if data_type == 'video':
+            prompt_inputs["pixel_values_videos"] = prompt_inputs["pixel_values_videos"].repeat(len(prompt_completion_ids), 1)
+            prompt_inputs["video_grid_thw"] = prompt_inputs["video_grid_thw"].repeat(len(prompt_completion_ids), 1)
+            if 'second_per_grid_ts' in prompt_inputs:
+                del prompt_inputs["second_per_grid_ts"]
+                
+        # import pdb
+        # pdb.set_trace()
+                
+        # per_token_logps = self._get_per_token_logps(model, prompt_completion_ids, attention_mask, pixel_values, image_grid_thw)
+        per_token_logps = self._get_per_token_logps(model, prompt_completion_ids, **prompt_inputs)
+        # Get rid of the prompt (-1 because of the shift done in get_per_token_logps)
+        per_token_logps = per_token_logps[:, prompt_length - 1 :]
+        
+        gc.collect()
+        torch.cuda.empty_cache()
+                
+        with torch.inference_mode():
+            if self.ref_model is not None:
+                ref_per_token_logps = self._get_per_token_logps(self.ref_model, prompt_completion_ids, **prompt_inputs)
+            else:
+                with self.accelerator.unwrap_model(model).disable_adapter():
+                    ref_per_token_logps = self._get_per_token_logps(model, prompt_completion_ids, **prompt_inputs)
+        ref_per_token_logps = ref_per_token_logps[:, prompt_length - 1 :]
+        
+        x_clamped = torch.clamp(ref_per_token_logps - per_token_logps, min=-10, max=10)  # 限制 x 的范围
+        per_token_kl = torch.exp(x_clamped) - x_clamped - 1
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+        if self.temporal and video_inputs:
+            
+            shuffled_completions = self.processing_class.batch_decode(shuffled_completion_ids, skip_special_tokens=True)
+            if is_conversational(inputs[0]):
+                shuffled_completions = [[{"role": "assistant", "content": shuffled_completion}] for shuffled_completion in shuffled_completions]
+                
+            # Compute the rewards
+            shuffled_prompts = [prompt for prompt in prompts for _ in range(self.shuffled_num_generations)]
+            shuffled_rewards_per_func = torch.zeros(len(shuffled_prompts), len(self.reward_funcs), device=device)
+            for i, (reward_func, reward_processing_class) in enumerate(
+                zip(self.reward_funcs, self.reward_processing_classes)
+            ):
+                # Repeat all input columns (but "prompt" and "completion") to match the number of generations
+                shuffled_reward_kwargs = {key: [] for key in inputs[0].keys() if key not in ["prompt", "completion"]}
+                for key in shuffled_reward_kwargs:
+                    for example in inputs:
+                        # Repeat each value in the column for `num_generations` times
+                        shuffled_reward_kwargs[key].extend([example[key]] * self.shuffled_num_generations)
+                shuffled_output_reward_func = reward_func(prompts=shuffled_prompts, completions=shuffled_completions, **shuffled_reward_kwargs)
+                shuffled_rewards_per_func[:, i] = torch.tensor(shuffled_output_reward_func, dtype=torch.float32, device=device)
+                
+                
+
+        # Decode the generated completions
+        completions = self.processing_class.batch_decode(
+            completion_ids, skip_special_tokens=True
+        )
+        if is_conversational(inputs[0]):
+            completions = [
+                [{"role": "assistant", "content": completion}]
+                for completion in completions
+            ]
+
+        # Compute the rewards
+        prompts = [prompt for prompt in prompts for _ in range(self.num_generations)]
+        rewards_per_func = torch.zeros(
+            len(prompts), len(self.reward_funcs), device=device
+        )
+        for i, (reward_func, reward_processing_class) in enumerate(
+            zip(self.reward_funcs, self.reward_processing_classes)
+        ):
+            reward_kwargs = {
+                key: []
+                for key in inputs[0].keys()
+                if key not in ["prompt", "completion"]
+            }
+            for key in reward_kwargs:
+                for example in inputs:
+                    # Repeat each value in the column for `num_generations` times
+                    reward_kwargs[key].extend([example[key]] * self.num_generations)
+            output_reward_func = reward_func(
+                prompts=prompts, completions=completions, **reward_kwargs
+            )
+            rewards_per_func[:, i] = torch.tensor(
+                output_reward_func, dtype=torch.float32, device=device
+            )
+            
+            
+        # rewards_per_func = gather(rewards_per_func)
+        # # Sum the rewards from all reward functions
+        # rewards = rewards_per_func.sum(dim=1)
+        
+        # process_slice = slice(
+        #     self.accelerator.process_index * len(prompts),
+        #     (self.accelerator.process_index + 1) * len(prompts),
+        # )
+        
+        # rewards = rewards[process_slice]
+        
+        
+        
+        if self.temporal and video_inputs:
+            temporal_rewards_per_func = rewards_per_func.clone()
+            
+            acc_mean = temporal_rewards_per_func[:, 0].mean()
+            shuffled_acc_mean = shuffled_rewards_per_func[:, 0].mean()
+
+            if acc_mean >= 0.8 * shuffled_acc_mean:
+                mask = temporal_rewards_per_func[:, 0] > 0.1
+                temporal_rewards_per_func[mask, 0] = temporal_rewards_per_func[mask, 0] + 0.3
+                temporal_rewards = torch.tensor([1.0]).to('cuda')
+            else:
+                temporal_rewards = torch.tensor([0.0]).to('cuda')
+        else:
+            temporal_rewards =  torch.tensor([0.5]).to('cuda')
+        
+        # Sum the rewards from all reward functions
+        if self.temporal and video_inputs:
+            rewards = temporal_rewards_per_func.sum(dim=1)
+        else:
+            rewards = rewards_per_func.sum(dim=1)
+            
+        if self.len_control:
+            mem_rewards = [0] * self.num_generations
+            mask = rewards_per_func[:, 0] > 0.1
+            lenth_list = completion_mask.sum(1)
+            selected_indices = torch.nonzero(mask, as_tuple=True)[0].tolist()
+            #             if len(selected_indices) > 1 and len(selected_indices) < self.num_generations:
+            # if len(selected_indices) > 1:
+            #     selected_items = [(i, lenth_list[i]) for i in selected_indices]
+            #     sorted_items = sorted(selected_items, key=lambda x: x[1], reverse=True)
+            #     N = len(sorted_items)
+            #     for rank, (idx, length) in enumerate(sorted_items):
+            #         reward = 0.2 - 0.2 * (rank / N)
+            #         rewards[idx] += reward
+            #         mem_rewards[idx] = reward
+            # for idx in range(len(lenth_list)):
+            #     if lenth_list[idx] >= 512:
+            #         rewards[idx] -= 0.5
+                    
+            if len(selected_indices) > 1:     
+                for idx in selected_indices:
+                    if 320 <= lenth_list[idx] <= 1600:
+                        rewards[idx] += 0.2
+        
+        # print(rewards)
+        # print(completion_mask.sum(1))
+
+        # Compute grouped-wise rewards
+        mean_grouped_rewards = rewards.view(-1, self.num_generations).mean(dim=1)
+        std_grouped_rewards = rewards.view(-1, self.num_generations).std(dim=1)
+
+        # Normalize the rewards to compute the advantages
+        mean_grouped_rewards = mean_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        std_grouped_rewards = std_grouped_rewards.repeat_interleave(self.num_generations, dim=0)
+        advantages = (rewards - mean_grouped_rewards) / (std_grouped_rewards + 1e-4)
+
+        # x - x.detach() allows for preserving gradients from x
+        per_token_loss = torch.exp(per_token_logps - per_token_logps.detach()) * advantages.unsqueeze(1)
+        per_token_loss = -(per_token_loss - self.beta * per_token_kl)
+        # per_token_loss = -per_token_loss
+        loss = ((per_token_loss * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+       
+            
+        # import pdb
+        # pdb.set_trace()
+
+        # Log the metrics
+        completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
+        self._metrics["completion_length"].append(completion_length)
+
+        reward_per_func = self.accelerator.gather_for_metrics(rewards_per_func).mean(0)
+        for i, reward_func in enumerate(self.reward_funcs):
+            if isinstance(reward_func, PreTrainedModel):
+                reward_func_name = reward_func.config._name_or_path.split("/")[-1]
+            else:
+                reward_func_name = reward_func.__name__
+            self._metrics[f"rewards/{reward_func_name}"].append(reward_per_func[i].item())
+        
+        gathered_rewards = self.accelerator.gather_for_metrics(rewards)
+        
+        num_devices = gathered_rewards.size(0) // self.num_generations 
+        rewards_per_device = gathered_rewards.view(num_devices, self.num_generations)
+        wrong_devices = (rewards_per_device <= 1).all(dim=1)
+        wrong_ratio = wrong_devices.sum().item() / num_devices
+        
+        correct_devices = (rewards_per_device >= 2).all(dim=1)
+        correct_ratio = correct_devices.sum().item() / num_devices
+        
+        self._metrics["all_wrong"].append(wrong_ratio)
+        self._metrics["all_correct"].append(correct_ratio)
+        
+        if self.temporal:
+            temporal_rewards_list = self.accelerator.gather_for_metrics(temporal_rewards)
+            self._metrics["temporal_rewards"].append(self.accelerator.gather_for_metrics(temporal_rewards_list).mean().item())
+        
+        self._metrics["reward"].append(self.accelerator.gather_for_metrics(rewards).mean().item())
+
+        self._metrics["reward_std"].append(self.accelerator.gather_for_metrics(std_grouped_rewards).mean().item())
+
+        mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
+        self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
+        
+
+        return loss
+    
+
+
+        
+    def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
+        metrics = {key: sum(val) / len(val) for key, val in self._metrics.items()}  # average the metrics
+
+        # This method can be called both in training and evaluation. When called in evaluation, the keys in `logs`
+        # start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format.
+        if next(iter(logs.keys())).startswith("eval_"):
+            metrics = {f"eval_{key}": val for key, val in metrics.items()}
+
+        logs = {**logs, **metrics}
+        if version.parse(transformers.__version__) >= version.parse("4.47.0.dev0"):
+            super().log(logs, start_time)
+        else:  # transformers<=4.46
+            super().log(logs)
+        self._metrics.clear()
\ No newline at end of file
diff --git a/src/scripts/3b-LLMEval.sh b/src/scripts/3b-LLMEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b85d4281fa69637559374f8f52b80f592bafbb10
--- /dev/null
+++ b/src/scripts/3b-LLMEval.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+
+DATA_FILE="pool_multiple_choice_chunk_01"
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-LLMEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-LLMEval-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="8" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-LLMEval.py \
+    --use_vllm false \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1400 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 50 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b-description-LLMEval.sh b/src/scripts/3b-description-LLMEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ef30d252a8a024086aac1cf35dab22b865e938d6
--- /dev/null
+++ b/src/scripts/3b-description-LLMEval.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/sft_models/mllm_data1/Qwen2.5-VL-3B-Instruct'
+
+DATA_FILE="merged_train"
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-LLMEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Description-LLMEval-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-description-LLMEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1400 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 20 \
+    --save_only_model false \
+    --temporal true \
+    --len_control false \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_grpo_answerBERT_eval.sh b/src/scripts/3b_grpo_answerBERT_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..27540a62c22e22b720a531c608f74f7c8b9d6987
--- /dev/null
+++ b/src/scripts/3b_grpo_answerBERT_eval.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+# DATA_FILE="pool_multiple_choice_chunk_01"
+
+
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/pool_multiple_choice_chunk_01/checkpoint-57'
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/pool_numerical_chunk_01/checkpoint-42'
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/pool_numerical_chunk_02/checkpoint-14'
+# DATA_FILE="video_pool_multiple_choice_chunk_01"
+
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/video_pool_multiple_choice_chunk_01/checkpoint-46'
+# DATA_FILE="pool_numerical_chunk_03"
+
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/video_pool_multiple_choice_chunk_01/checkpoint-46'
+DATA_FILE="video_pool_multiple_choice_chunk_02"
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-AnswerBERT/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-answerBERT-Eval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-answerBERT-eval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_run_grpo_vllm_72B_eval.sh b/src/scripts/3b_run_grpo_vllm_72B_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..efc19d7aaaf47fd62275bec5fe20fd3561c549c8
--- /dev/null
+++ b/src/scripts/3b_run_grpo_vllm_72B_eval.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# DATA_FILE="pool_multiple_choice_chunk_01"
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+
+DATA_FILE="pool_numerical_chunk_01"
+QWEN_PATH="/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-72BEval-Train/pool_multiple_choice_chunk_01/checkpoint-57"
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-72BEval-Train/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-GRPO-72BEval-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-72BEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_run_grpo_vllm_answerBERT_thenNoDesEval.sh b/src/scripts/3b_run_grpo_vllm_answerBERT_thenNoDesEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f3d01890bbe691a07d83ef17eaab082da6d8c049
--- /dev/null
+++ b/src/scripts/3b_run_grpo_vllm_answerBERT_thenNoDesEval.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/video_pool_multiple_choice_chunk_01/checkpoint-46'
+DATA_FILE="pool_multiple_choice_chunk_02"
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-answerBERT-ThenNoDesEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-GRPO-answerBERT-ThenNoDesEval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noDesEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control false \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_run_grpo_vllm_no_des_eval.sh b/src/scripts/3b_run_grpo_vllm_no_des_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1f8ce8e640b88dba51c0388fa2a6199c43a0c30b
--- /dev/null
+++ b/src/scripts/3b_run_grpo_vllm_no_des_eval.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+# DATA_FILE="pool_multiple_choice_chunk_01"
+
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-NoDesEval/pool_multiple_choice_chunk_01/checkpoint-57'
+DATA_FILE="pool_numerical_chunk_01"
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-NoDesEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-GRPO-NoDes-Eval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noDesEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_run_grpo_vllm_self_eval.sh b/src/scripts/3b_run_grpo_vllm_self_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..94ee6b7ce9c258443f0f6f9800a650d66edacc99
--- /dev/null
+++ b/src/scripts/3b_run_grpo_vllm_self_eval.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# DATA_FILE="pool_multiple_choice_chunk_01"
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+
+# QWEN_PATH="/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-SelfEval-Train/pool_multiple_choice_chunk_01"
+# DATA_FILE="pool_numerical_chunk_01"
+
+
+QWEN_PATH="/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-SelfEval-Train/pool_numerical_chunk_01/checkpoint-25"
+DATA_FILE="video_pool_multiple_choice_chunk_01"
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-SelfEval-Train/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-GRPO-SelfEval-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-selfEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_run_grpo_vllm_self_evalConst.sh b/src/scripts/3b_run_grpo_vllm_self_evalConst.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f96aadbb61fabb703ae76f9506ff848ba9a03930
--- /dev/null
+++ b/src/scripts/3b_run_grpo_vllm_self_evalConst.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+DATA_FILE="pool_multiple_choice_chunk_01"
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-SelfEvalConst/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-GRPO-SelfEvalConst-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-selfEvalConst.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 100 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/3b_run_grpo_vllm_self_eval_thenNoDesEval.sh b/src/scripts/3b_run_grpo_vllm_self_eval_thenNoDesEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..bf149291374fbc69ea6ed96068d04d4fc2a106ca
--- /dev/null
+++ b/src/scripts/3b_run_grpo_vllm_self_eval_thenNoDesEval.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+## Take self eval then do no des eval
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-SelfEval-Train/pool_numerical_chunk_01/checkpoint-25'
+DATA_FILE='video_pool_multiple_choice_chunk_01'
+
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-SelfEval-Train/pool_numerical_chunk_01/checkpoint-25'
+# # DATA_FILE="pool_numerical_chunk_01"
+# DATA_FILE='pool_multiple_choice_chunk_02'
+
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/video_pool_multiple_choice_chunk_01/checkpoint-46'
+# DATA_FILE='pool_numerical_chunk_02'
+
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/3B-Video-GRPO-AnswerBERT/video_pool_multiple_choice_chunk_01/checkpoint-46'
+DATA_FILE="pool_multiple_choice_chunk_02"
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/3B-Video-GRPO-selfEval-ThenNoDesEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="3B-Video-GRPO-selfEval-ThenNoDesEval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noDesEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control false \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/7b-description-LLMEval.sh b/src/scripts/7b-description-LLMEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..82aa5ec74b3094668e6553c40a6829fcac96f927
--- /dev/null
+++ b/src/scripts/7b-description-LLMEval.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-7B-Instruct'
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/sft_models/qwen2_5_vl_7b_mllm_data1/Qwen2.5-VL-7B-Instruct'
+
+DATA_FILE="merged_train"
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/7B-LLMEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="7B-Description-LLMEval-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-description-LLMEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1400 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 20 \
+    --save_only_model false \
+    --temporal true \
+    --len_control false \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/7b_grpo_answerBERT_eval.sh b/src/scripts/7b_grpo_answerBERT_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..77c2f04030cc7898bec1c43d75dd8bead797b62f
--- /dev/null
+++ b/src/scripts/7b_grpo_answerBERT_eval.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-7B-Instruct'
+# DATA_FILE="pool_multiple_choice_chunk_01"
+
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/7B-Video-GRPO-SelfEval-Train/pool_multiple_choice_chunk_01/checkpoint-115'
+DATA_FILE="pool_numerical_chunk_01"
+
+
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/7B-Video-GRPO-AnswerBERT/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="7B-Video-answerBERT-Eval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-answerBERT-eval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/7b_run_grpo_vllm_no_des_eval.sh b/src/scripts/7b_run_grpo_vllm_no_des_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..602935462c07b93647865cd7159890977c762bca
--- /dev/null
+++ b/src/scripts/7b_run_grpo_vllm_no_des_eval.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-7B-Instruct'
+DATA_FILE="pool_multiple_choice_chunk_01"
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/7B-Video-GRPO-NoDesEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="7B-Video-GRPO-NoDes-Eval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noDesEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 976 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 16 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 50 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/7b_run_grpo_vllm_self_eval.sh b/src/scripts/7b_run_grpo_vllm_self_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a979e3c3c4b99c6cd82dc4c09b39eb5053e009fc
--- /dev/null
+++ b/src/scripts/7b_run_grpo_vllm_self_eval.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# DATA_FILE="pool_multiple_choice_chunk_01"
+# QWEN_PATH='Qwen/Qwen2.5-VL-7B-Instruct'
+
+
+QWEN_PATH="/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/7B-Video-GRPO-SelfEval-Train/pool_multiple_choice_chunk_01/checkpoint-115"
+DATA_FILE="pool_numerical_chunk_01"
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/7B-Video-GRPO-SelfEval-Train/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="7B-Video-GRPO-SelfEval-${DATA_FILE}"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-selfEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/7b_run_grpo_vllm_self_eval_thenNoDesEval.sh b/src/scripts/7b_run_grpo_vllm_self_eval_thenNoDesEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b17e10c0b9d37b5a1d78525aac752125b80ba24e
--- /dev/null
+++ b/src/scripts/7b_run_grpo_vllm_self_eval_thenNoDesEval.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/7B-Video-GRPO-SelfEval-Train/pool_multiple_choice_chunk_01/checkpoint-115'
+# DATA_FILE="pool_numerical_chunk_01"
+
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/7B-Video-GRPO-selfEval-ThenNoDesEval/pool_numerical_chunk_01/checkpoint-42'
+DATA_FILE='pool_numerical_chunk_02'
+
+
+HF_DATASET="./Video-R1-data/${DATA_FILE}.json"
+OUTPUT_DIR="./log/7B-Video-GRPO-selfEval-ThenNoDesEval/${DATA_FILE}"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="7B-Video-GRPO-selfEval-ThenNoDesEval"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+export WANDB_API_KEY="5e11bfa8cf4062940486d279ecd9e70617d4ac7a"
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noDesEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 14 \
+    --save_only_model false \
+    --temporal true \
+    --len_control false \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/gpu_burn.py b/src/scripts/gpu_burn.py
new file mode 100644
index 0000000000000000000000000000000000000000..68a752ebc4efdfbe3f4ecb020d6eef2f303a7b50
--- /dev/null
+++ b/src/scripts/gpu_burn.py
@@ -0,0 +1,61 @@
+# import time
+
+# # Number of seconds in a day: 24 hours * 60 minutes * 60 seconds
+# seconds_in_a_day = 24 * 60 * 60
+
+# # Sleep for 100 days
+# time.sleep(seconds_in_a_day * 500)
+
+import subprocess
+import time
+import threading
+import torch
+from collections import deque
+
+def get_gpu_details(gpu_id):
+    """Returns the GPU utilization, used memory, and total memory for a specific GPU."""
+    cmd = ['nvidia-smi', '--id=' + str(gpu_id),
+           '--query-gpu=utilization.gpu,memory.used,memory.total',
+           '--format=csv,noheader,nounits']
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, text=True)
+    utilization, used_memory, total_memory = result.stdout.strip().split(', ')
+    return int(utilization), int(used_memory), int(total_memory)
+
+def matrix_calculation_task(gpu_id, stop_event, task_running):
+    """Performs a GPU-occupying task on the specified GPU."""
+    torch.cuda.set_device(gpu_id)
+    task_running[gpu_id] = True
+    while not stop_event.is_set():
+        a = torch.rand(55000, 55000, device='cuda')
+        b = torch.rand(55000, 55000, device='cuda')
+        torch.matmul(a, b)
+    task_running[gpu_id] = False
+
+def monitor_and_manage_gpu(gpu_id, stop_event, task_running):
+    """Monitors a GPU and manages the matrix calculation task based on average usage."""
+    utilization_data = deque(maxlen=30)  # Stores the last 30 seconds of utilization data
+    while True:
+        utilization, _, _ = get_gpu_details(gpu_id)
+        utilization_data.append(utilization)
+        if len(utilization_data) == 30:  # Every 30 seconds
+            avg_utilization = round(sum(utilization_data) / len(utilization_data), 1)
+            if avg_utilization < 90 and not task_running[gpu_id]:
+                print(f"Average GPU {gpu_id} ({avg_utilization}%) utilization over the last 30 seconds is underutilized, starting task.")
+                stop_event.clear()
+                threading.Thread(target=matrix_calculation_task, args=(gpu_id, stop_event, task_running)).start()
+            elif avg_utilization >= 90 and task_running[gpu_id]:
+                print(f"Average GPU {gpu_id} ({avg_utilization}%) utilization over the last 30 seconds is nornal, keep running.")
+            else:
+                if task_running[gpu_id]:
+                    print(f"Occupying task just starts, and average GPU {gpu_id} ({avg_utilization}%) is increasing, keep monitoring.")
+                else:
+                    print(f"No occupying task running, but average GPU {gpu_id} ({avg_utilization}%) utilization over the last 30 seconds is nornal.")
+        time.sleep(1)  # Check every second, but make decisions based on the 30-second average
+
+num_gpus = 8
+stop_events = [threading.Event() for _ in range(num_gpus)]
+task_running = [False] * num_gpus
+
+# Start monitoring and task management for each GPU
+for gpu_id in range(1, num_gpus):
+    threading.Thread(target=monitor_and_manage_gpu, args=(gpu_id, stop_events[gpu_id], task_running)).start()
diff --git a/src/scripts/gpu_burn.sh b/src/scripts/gpu_burn.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1e8da3e1febb671e7d52d434ee68198858514e69
--- /dev/null
+++ b/src/scripts/gpu_burn.sh
@@ -0,0 +1 @@
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_answerBERT_eval.sh b/src/scripts/run_grpo_vllm_answerBERT_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..05ba386b34519f6a7ebffcac997a7e102aa861a2
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_answerBERT_eval.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/Qwen2.5-VL-3B-Video-GRPO-answerBERT-Eval-Train-QA10K/checkpoint-30'
+HF_DATASET="./Video-R1-data/Train_QA_10k_noFreeForm.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-answerBERT-Eval-Train-QA10K"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-answerBERT-Eval-QA10K"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="6" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-answerBERT-eval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 976 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 4 \
+    --run_name ${RUN_NAME} \
+    --save_steps 200 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.6 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_answerBERT_eval_novllm.sh b/src/scripts/run_grpo_vllm_answerBERT_eval_novllm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e353276f87b140b38a4dab5894a29fec2d7b3a62
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_answerBERT_eval_novllm.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+# QWEN_PATH='/apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/src/r1-v/log/Qwen2.5-VL-3B-Video-GRPO-answerBERT-Eval-Train-QA10K/checkpoint-30'
+HF_DATASET="./Video-R1-data/Train_QA_10k_noFreeForm.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-answerBERT-Eval-Train-QA10K"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-answerBERT-Eval-QA10K"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-answerBERT-eval.py \
+    --use_vllm false \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 1600 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 4 \
+    --run_name ${RUN_NAME} \
+    --save_steps 150 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_no_des_eval.sh b/src/scripts/run_grpo_vllm_no_des_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..32fcd8025672b2e1d549979a8fe6bbad6e4676bd
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_no_des_eval.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+HF_DATASET="./Video-R1-data/Train_QA_10k_noFreeForm.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-NoDesEvall-Train-QA10K"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-NoDes-Eval-QA10K"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="8" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noDesEval.py \
+    --use_vllm false \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 976 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 4 \
+    --run_name ${RUN_NAME} \
+    --save_steps 100 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.6 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_qwen25vl-cot-LLMEval.sh b/src/scripts/run_grpo_vllm_qwen25vl-cot-LLMEval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..761cfc5797b415d52a3ceae5a9531064557cbc0a
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_qwen25vl-cot-LLMEval.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-7B-Instruct'
+# QWEN_PATH='Video-R1/Video-R1-7B'
+HF_DATASET="./Video-R1-data/Train_QA_10k_noFreeForm.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-7B-Video-GRPO-LLMEval-Train-QA10K"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-7B-Video-GRPO-COT-LLMEval-QA10K"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="6" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-LLMEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 768 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 50 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_qwen25vl-cot-noInfo.sh b/src/scripts/run_grpo_vllm_qwen25vl-cot-noInfo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..61580a85b932d7332d7283dedcdeeeb18da54ced
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_qwen25vl-cot-noInfo.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+HF_DATASET="./Video-R1-data/geometry-train.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-noInfo"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-noInfo"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="6" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-noInfo.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 768 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 4 \
+    --run_name ${RUN_NAME} \
+    --save_steps 40 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.5 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_qwen25vl-cot.sh b/src/scripts/run_grpo_vllm_qwen25vl-cot.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d27b3476b936089860989616f71eae2d55a19885
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_qwen25vl-cot.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+HF_DATASET="./Video-R1-data/geometry-train.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-Info"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-Info"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="6" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 768 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 4 \
+    --run_name ${RUN_NAME} \
+    --save_steps 465 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.5 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_qwen25vl.sh b/src/scripts/run_grpo_vllm_qwen25vl.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5829587f892b92aac8c9a12fb3a8c9dd137bc466
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_qwen25vl.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+# QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+# HF_DATASET="./Video-R1-data/Video-R1-260k.json"
+# OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO"
+
+QWEN_PATH='zli12321/VideoHallu-3B-R1'
+HF_DATASET="./Video-R1-data/synthetic_data_split.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-synthetic-Video-GRPO"
+
+
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 768 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 1 \
+    --run_name ${RUN_NAME} \
+    --save_steps 100 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_qwen_eval.sh b/src/scripts/run_grpo_vllm_qwen_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..48c3f533cc790c6eeb31710e153e26b01daecb8a
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_qwen_eval.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+HF_DATASET="./Video-R1-data/Train_QA_10k_noFreeForm.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-qwen-Eval-Train-QA10K"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-qwen-Eval-QA10K"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5" torchrun \
+    --nproc_per_node="6" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-qwenEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 976 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 4 \
+    --run_name ${RUN_NAME} \
+    --save_steps 30 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:6" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_grpo_vllm_self_eval.sh b/src/scripts/run_grpo_vllm_self_eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e23bf9fcdb453e54badd42fac71b1ae3d391c72f
--- /dev/null
+++ b/src/scripts/run_grpo_vllm_self_eval.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true"
+export LOG_PATH="./vllm_run.txt"
+
+
+QWEN_PATH='Qwen/Qwen2.5-VL-3B-Instruct'
+HF_DATASET="./Video-R1-data/Train_QA_10k_noFreeForm.json"
+OUTPUT_DIR="./log/Qwen2.5-VL-3B-Video-GRPO-Self-Eval-Train-QA10K"
+if [ ! -d "$OUTPUT_DIR" ]; then
+ mkdir -p "$OUTPUT_DIR"
+fi
+RUN_NAME="Qwen2.5-VL-3B-Video-GRPO-COT-SelfEval-QA10K"
+DS_CONFIG="local_scripts/zero3.json"  
+
+# Set temporal to choose between T-GRPO and GRPO, and len_control to enable or disable the length control reward.
+# NOTE: you are expected to use X + 1 cards for X training proc and 1 vLLM proc 
+# e.g., the visible devices should be 0,1,2,3,4 for 5 cards, and  --nproc_per_node="4"
+
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun \
+    --nproc_per_node="7" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12345" \
+    src/open_r1/grpo-cot-selfEval.py \
+    --use_vllm true \
+    --output_dir ${OUTPUT_DIR} \
+    --model_name_or_path ${QWEN_PATH} \
+    --dataset_name ${HF_DATASET} \
+    --max_prompt_length 16384 \
+    --max_completion_length 976 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 16 \
+    --learning_rate 1e-6 \
+    --lr_scheduler_type "cosine" \
+    --weight_decay 0.01 \
+    --logging_steps 1 \
+    --bf16 true \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --min_pixels 3136 \
+    --max_pixels 501760 \
+    --num_train_epochs 2 \
+    --run_name ${RUN_NAME} \
+    --save_steps 30 \
+    --save_only_model false \
+    --temporal true \
+    --len_control true \
+    --report_to wandb \
+    --beta 0.04 \
+    --max_grad_norm 5 \
+    --temperature 1.0 \
+    --num_generations 8 \
+    --vllm_device "cuda:7" \
+    --vllm_gpu_memory_utilization 0.7 \
+    --deepspeed ${DS_CONFIG} \
+    2>&1 | tee "${OUTPUT_DIR}/training_log.txt"
+
+
+python /apdcephfs_sh2/share_300000800/user/zongxia/Video-R1/gpu_burn.py
\ No newline at end of file
diff --git a/src/scripts/run_sft_video.sh b/src/scripts/run_sft_video.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a67c65834238ba421bc9c48b3ac62c5afcc4781a
--- /dev/null
+++ b/src/scripts/run_sft_video.sh
@@ -0,0 +1,34 @@
+./move.sh
+
+cd src/r1-v
+
+export DEBUG_MODE="true" # Enable Debug if you want to see the rollout of model during RL
+export LOG_PATH="./debug_log_2b.txt"
+
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node="4" \
+    --nnodes="1" \
+    --node_rank="0" \
+    --master_addr="127.0.0.1" \
+    --master_port="12349" \
+    src/open_r1/sft_video.py \
+    --output_dir "./log/Qwen2.5-VL-3B-Video-3B-cot-sft" \
+    --model_name_or_path "Qwen/Qwen2.5-VL-3B-Instruct" \
+    --dataset_name "./Video-R1-data/Video-R1-COT-165k.json" \
+    --deepspeed local_scripts/zero2_1.json \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --learning_rate 1e-6 \
+    --logging_steps 1 \
+    --bf16 \
+    --report_to wandb \
+    --gradient_checkpointing true \
+    --attn_implementation flash_attention_2 \
+    --num_train_epochs 1 \
+    --run_name Qwen2.5-VL-3B-Video-cot-sft \
+    --save_steps 1000 \
+    --max_grad_norm 5 \
+    --save_only_model true \
+
+
+python /cq_1/share_1603164/user/zongxia/workspace/gpu_burn.py
\ No newline at end of file