From f0c47e06e73c78dcaac2ad20a7e55b1a4f5190a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vil=C3=A9m=20Zouhar?= <vilem.zouhar@gmail.com>
Date: Fri, 2 Aug 2024 20:12:08 +0200
Subject: [PATCH 1/6] fix typo

---
 EvalView/templates/EvalView/_instructions-esa.html | 2 +-
 EvalView/templates/EvalView/_instructions-mqm.html | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/EvalView/templates/EvalView/_instructions-esa.html b/EvalView/templates/EvalView/_instructions-esa.html
index 62c2d143..04ec0f4d 100644
--- a/EvalView/templates/EvalView/_instructions-esa.html
+++ b/EvalView/templates/EvalView/_instructions-esa.html
@@ -1,7 +1,7 @@
 <div class="row">
   <div class="col-md-12">
     <ul class="list-unstyled">
-      <li><strong>Higlighting errors:</strong>
+      <li><strong>Highlighting errors:</strong>
         <ul>
           <li>
             Highlight the text fragment where you have identified a translation error (drag or click start & end).
diff --git a/EvalView/templates/EvalView/_instructions-mqm.html b/EvalView/templates/EvalView/_instructions-mqm.html
index 2f36b694..284beed1 100644
--- a/EvalView/templates/EvalView/_instructions-mqm.html
+++ b/EvalView/templates/EvalView/_instructions-mqm.html
@@ -1,7 +1,7 @@
 <div class="row">
   <div class="col-md-12">
     <ul class="list-unstyled">
-      <li><strong>Higlighting errors:</strong>
+      <li><strong>Highlighting errors:</strong>
         <ul>
           <li>
             Highlight the text fragment where you have identified a translation error (drag or click start & end).

From d64659b142ad16d88262cdec5d4fac9b9708e687 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vil=C3=A9m=20Zouhar?= <vilem.zouhar@gmail.com>
Date: Mon, 5 Aug 2024 10:29:24 +0200
Subject: [PATCH 2/6] clarify annotator tokens

---
 INSTALL.md | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index fffcf124..33809a75 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,4 +1,4 @@
-## Setup
+# Setup
 
 1. Basic setup:
 
@@ -39,6 +39,9 @@ python3 manage.py StartNewCampaign Examples/MQM+ESA/manifest.json \
 python3 manage.py CreateInviteTokens test_group 20 --create-group test_group
 ```
 
+Add `--task-confirmation-tokens` if you with to show annotators tokens at the end.
+See [quality control](#Quality control) for more details.
+
 5. Optionally clean up everything
 
 ```
@@ -122,4 +125,13 @@ For task:
 - `batchNo`: task number
 - `randomSeed`: number used in batch generation
 - `requiredAnnotations`: how many annotations does a task need, in most cases use 1
-- `source/targetLanguage`: source and target language
\ No newline at end of file
+- `source/targetLanguage`: source and target language
+
+## Quality control
+
+With `--task-confirmation-tokens`, the annotators will be shown a random one if they fail the quality control and a correct one (matching the one in the CSV output) if they succeed.
+The quality control checks if the perturbed samples (`itemType=BAD`) have statistically lower scores than the original ones (`itemType=TGT`).
+Even without the switch, the campaign status page will show a p-value (last column for staff account) that corresponds to the outcome of this test.
+If it's close to 1, then the annotator is annotating randomly and is of poor quality.
+For values close to 0, the annotations are good.
+The threshold to generate the true token for annotators is currently p<=10%.

From 2666cb22ef5376f9da33e7829a7d63b3ef5082fe Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
Date: Mon, 19 Aug 2024 10:38:18 +0100
Subject: [PATCH 3/6] Update INSTALL.md

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index 33809a75..5c2c32a7 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -39,7 +39,7 @@ python3 manage.py StartNewCampaign Examples/MQM+ESA/manifest.json \
 python3 manage.py CreateInviteTokens test_group 20 --create-group test_group
 ```
 
-Add `--task-confirmation-tokens` if you with to show annotators tokens at the end.
+Add `--task-confirmation-tokens` if you want to generate annotator confirmation tokens.
 See [quality control](#Quality control) for more details.
 
 5. Optionally clean up everything

From 82e9eab7e19dfa35f12af3aaf17cd6db47172d3b Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
Date: Mon, 19 Aug 2024 10:38:24 +0100
Subject: [PATCH 4/6] Update INSTALL.md

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index 5c2c32a7..6b11109e 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -129,7 +129,7 @@ For task:
 
 ## Quality control
 
-With `--task-confirmation-tokens`, the annotators will be shown a random one if they fail the quality control and a correct one (matching the one in the CSV output) if they succeed.
+With `--task-confirmation-tokens`, the annotators will be shown a random key/token if they fail the quality control and a correct one (matching the one in the CSV output with credentials) if they succeed.
 The quality control checks if the perturbed samples (`itemType=BAD`) have statistically lower scores than the original ones (`itemType=TGT`).
 Even without the switch, the campaign status page will show a p-value (last column for staff account) that corresponds to the outcome of this test.
 If it's close to 1, then the annotator is annotating randomly and is of poor quality.

From 715ade94b35c9fe0364d07e55ed79586b6deca54 Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
Date: Mon, 19 Aug 2024 10:38:30 +0100
Subject: [PATCH 5/6] Update INSTALL.md

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index 6b11109e..a85f6d10 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -134,4 +134,4 @@ The quality control checks if the perturbed samples (`itemType=BAD`) have statis
 Even without the switch, the campaign status page will show a p-value (last column for staff account) that corresponds to the outcome of this test.
 If it's close to 1, then the annotator is annotating randomly and is of poor quality.
 For values close to 0, the annotations are good.
-The threshold to generate the true token for annotators is currently p<=10%.
+The threshold to generate the valid token for annotators is currently p<=10%.

From a832a62bbd680a5bc56eb72776223515e53327dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vil=C3=A9m=20Zouhar?= <vilem.zouhar@gmail.com>
Date: Sun, 20 Jul 2025 16:28:58 -0700
Subject: [PATCH 6/6] add <img support, fix document count computation, render
 newlines as newlines

---
 EvalData/models/direct_assessment_document.py | 17 +++++++++------
 .../direct-assessment-document-mqm-esa.css    |  7 +++++++
 .../js/direct-assessment-document-mqm-esa.js  | 21 +++++++++++++++----
 .../direct-assessment-document-mqm-esa.html   |  2 +-
 EvalView/views.py                             | 14 +++++++++----
 5 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/EvalData/models/direct_assessment_document.py b/EvalData/models/direct_assessment_document.py
index 410b736c..4499bf6a 100644
--- a/EvalData/models/direct_assessment_document.py
+++ b/EvalData/models/direct_assessment_document.py
@@ -255,11 +255,12 @@ def next_document_for_user_mqmesa(self, user):
         Used for MQM/ESA views
         Specifically a tuple with:
             next_item,
-            completed_items,
-            completed_docs,
+            items_completed,
+            items_total,
+            docs_completed,
+            docs_total,
             doc_items,
             doc_items_results,
-            total_docs,
         """
 
         # get all items (100) and try to find resul
@@ -274,16 +275,19 @@ def next_document_for_user_mqmesa(self, user):
         ]
         unfinished_items = [i for i, r in all_items if not r]
 
-        docs_total = len({i.documentID for i, r in all_items})
+        # documentID + targetID uniquely identifies documents
+        docs_total = len({(i.documentID, i.targetID) for i, r in all_items})
         items_completed = len([i for i, r in all_items if r and r.completed])
         docs_completed = docs_total - len(
-            {i.documentID for i, r in all_items if r is None or not r.completed}
+            {(i.documentID, i.targetID) for i, r in all_items if r is None or not r.completed}
         )
+        items_total = len(all_items)
 
         if not unfinished_items:
             return (
                 None,
                 items_completed,
+                items_total,
                 docs_completed,
                 [],
                 [],
@@ -309,10 +313,11 @@ def next_document_for_user_mqmesa(self, user):
         return (
             next_item,  # the first unannotated item for the user
             items_completed,  # the number of completed items in the task
+            items_total,
             docs_completed,  # the number of completed documents in the task
+            docs_total,  # the total number of documents in the task
             doc_items,  # all items from the current document
             doc_items_results,  # all score results from the current document
-            docs_total,  # the total number of documents in the task
         )
 
     def get_results_for_each_item(self, block_items, user):
diff --git a/EvalView/static/EvalView/css/direct-assessment-document-mqm-esa.css b/EvalView/static/EvalView/css/direct-assessment-document-mqm-esa.css
index d17ab3e7..f7acdc0b 100644
--- a/EvalView/static/EvalView/css/direct-assessment-document-mqm-esa.css
+++ b/EvalView/static/EvalView/css/direct-assessment-document-mqm-esa.css
@@ -73,6 +73,13 @@
     width: 100%;
 }
 
+.source-text > img {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    width: 45%;
+}
+
 .tutorial-text {
     text-align: center;
     color: #257;
diff --git a/EvalView/static/EvalView/js/direct-assessment-document-mqm-esa.js b/EvalView/static/EvalView/js/direct-assessment-document-mqm-esa.js
index 3a7e9e77..8f2c3844 100644
--- a/EvalView/static/EvalView/js/direct-assessment-document-mqm-esa.js
+++ b/EvalView/static/EvalView/js/direct-assessment-document-mqm-esa.js
@@ -75,6 +75,8 @@ const ERROR_TYPES = {
     },
     "Other": {},
 }
+
+
 Object.keys(SEVERITY_TO_COLOR).map((key) => {
     $(`#instruction_sev_${key}`).css("background-color", SEVERITY_TO_COLOR[key])
 })
@@ -311,8 +313,14 @@ class MQMItemHandler {
         }
         this.mqm_submitted = structuredClone(this.mqm)
         this.mqm_orig = JSON.parse(JSON.parse(this.el.children('#mqm-payload-orig').html()))
-        this.text_source_orig = decodeEntities(JSON.parse(this.el.children('#text-source-payload').html()).trim())
-        this.source_video = JSON.parse(this.el.children('#text-source-payload').html()).trim().startsWith("<video")
+        
+        let _src_raw = JSON.parse(this.el.children('#text-source-payload').html()).trim()
+        this.text_source_orig = decodeEntities(_src_raw)
+        this.source_is_multimodal = (
+            _src_raw.startsWith("<video") ||
+            _src_raw.startsWith("<audio") ||
+            _src_raw.startsWith("<img")
+        )
         // NOTE: we don't decode entities for the target text, which might cause false positive annotated errors
         this.text_target_orig = JSON.parse(this.el.children('#text-target-payload').html()).trim()
         this.SELECTION_STATE = []
@@ -335,9 +343,11 @@ class MQMItemHandler {
         let score = parseFloat(this.el.children('#score-payload').html())
 
     
-
         // setup_span_structure
         let html_target = this.text_target_orig.split("").map((v, i) => {
+            if (v == "\n") {
+                return "<br>" // preserve newlines
+            }
             return `<span class="mqm_char" id="target_char_${i}" char_id="${i}">${v}</span>`
         }).join("") + " <span class='mqm_char span_missing' id='target_char_missing' char_id='missing'>[MISSING]</span>"
         this.el_target.html(html_target)
@@ -357,8 +367,11 @@ class MQMItemHandler {
         }
 
         // handle character alignment estimation
-        if (!this.source_video) {
+        if (!this.source_is_multimodal) {
             let html_source = this.text_source_orig.split("").map((v, i) => {
+                if (v == "\n") {
+                    return "<br>" // preserve newlines
+                }
                 return `<span class="mqm_char_src" id="source_char_${i}" char_id="${i}">${v}</span>`
             }).join("")
             this.el_source.html(html_source)
diff --git a/EvalView/templates/EvalView/direct-assessment-document-mqm-esa.html b/EvalView/templates/EvalView/direct-assessment-document-mqm-esa.html
index 3de8e146..c9df8bba 100644
--- a/EvalView/templates/EvalView/direct-assessment-document-mqm-esa.html
+++ b/EvalView/templates/EvalView/direct-assessment-document-mqm-esa.html
@@ -22,7 +22,7 @@
             <td style="width:33%;text-align:left;">
                 <strong id="task_progress">
                     Completed {{docs_completed}}/{{docs_total}} documents,
-                    {{items_completed}}/100 segments
+                    {{items_completed}}/{{items_total}} segments
                 </strong>
             </td>
             <td style="width:33%;text-align:center;">
diff --git a/EvalView/views.py b/EvalView/views.py
index 40b4e7b5..fbe85cff 100644
--- a/EvalView/views.py
+++ b/EvalView/views.py
@@ -1133,10 +1133,11 @@ def direct_assessment_document_mqmesa(campaign, current_task, request):
     (
         next_item,
         items_completed,
+        items_total,
         docs_completed,
+        docs_total,
         doc_items,
         doc_items_results,
-        docs_total,
     ) = current_task.next_document_for_user_mqmesa(request.user)
 
     if not next_item:
@@ -1151,11 +1152,15 @@ def direct_assessment_document_mqmesa(campaign, current_task, request):
             # Send response to the Ajax POST request
             return JsonResponse(context)
 
-    # TODO: hotfix for WMT24
+    # TODO: hotfix for WMT24 and WMT25
     # Tracking issue: https://github.com/AppraiseDev/Appraise/issues/185
     for item in doc_items:
-        # don't escape HTML video or images
-        if item.sourceText.strip().startswith("<video") or item.sourceText.strip().startswith("<img"):
+        # don't escape HTML video, audio or images
+        if (
+            item.sourceText.strip().startswith("<video") or
+            item.sourceText.strip().startswith("<audio") or
+            item.sourceText.strip().startswith("<img")
+        ):
             continue
         item.sourceText = escape(item.sourceText)
 
@@ -1201,6 +1206,7 @@ def direct_assessment_document_mqmesa(campaign, current_task, request):
         'task_id': next_item.id,
         'document_id': next_item.documentID,
         'items_completed': items_completed,
+        'items_total': items_total,
         'docs_completed': docs_completed,
         'docs_total': docs_total,
         'source_language': source_language,