forked from mirrys/ImageMatching
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconftest.py
More file actions
66 lines (60 loc) · 2.1 KB
/
conftest.py
File metadata and controls
66 lines (60 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pytest
from etl.transform import RawDataset
from pyspark.sql import DataFrame
from pyspark.sql import SparkSession
@pytest.fixture(scope="session")
def raw_data(spark_session):
return spark_session.createDataFrame(
[
(
"0",
"Q1234",
"44444",
"Some page with suggestions",
'[{"image": "image1.jpg", "rating": 2.0, "note": "image was found in the following Wikis: ruwiki"}]',
None,
"arwiki",
"2020-12",
),
(
"1",
"Q56789",
"55555",
"Some page with no suggestion",
None,
None,
"arwiki",
"2020-12",
),
(
"2",
"Q66666",
"523523",
"Some page with 3 suggestions",
"["
'{"image": "image2.jpg", "rating": 2.0, "note": "image was found in the following Wikis: ruwiki,arwiki,enwiki"}, '
'{"image": "image3.jpg", "rating": 1, "note": "image was in the Wikidata item"}, '
'{"image": "image4.jpg", "rating": 3.0, "note": "image was found in the Commons category linked in '
'the Wikidata item"} '
"]",
'{"entity-type":"item","numeric-id":577,"id":"Q577"}',
"enwiki",
"2020-12",
),
],
RawDataset.schema,
)
@pytest.fixture(scope="session")
def wikis(spark_session: SparkSession) -> DataFrame:
return spark_session.createDataFrame(
[
["image was found in the following Wikis: ruwiki, itwiki,enwiki"],
["image was found in the following Wikis: "],
[None],
],
["note"],
)
def assert_shallow_equals(ddf: DataFrame, other_ddf: DataFrame) -> None:
assert len(set(ddf.columns).difference(set(other_ddf.columns))) == 0
assert ddf.subtract(other_ddf).rdd.isEmpty()
assert other_ddf.subtract(ddf).rdd.isEmpty()