anderson-ufrj commited on
Commit
5688acd
·
1 Parent(s): f021101

feat(models): add data models for dados.gov.br API

Browse files

- Create Pydantic models for datasets, resources, and organizations
- Add models for search results and API responses
- Include validation for data formats and field normalization
- Support metadata fields and relationships between entities

Files changed (1) hide show
  1. src/tools/dados_gov_models.py +159 -0
src/tools/dados_gov_models.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data models for Dados.gov.br API responses.
3
+
4
+ This module contains Pydantic models for parsing and validating
5
+ responses from the Brazilian Open Data Portal API.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List, Optional, Union
10
+
11
+ from pydantic import BaseModel, Field, field_validator
12
+
13
+
14
+ class Resource(BaseModel):
15
+ """Model for a dataset resource (file)"""
16
+
17
+ id: str = Field(..., description="Unique resource identifier")
18
+ package_id: str = Field(..., description="Parent dataset identifier")
19
+ name: str = Field(..., description="Resource name")
20
+ description: Optional[str] = Field(None, description="Resource description")
21
+ format: Optional[str] = Field(None, description="File format (CSV, JSON, XML, etc.)")
22
+ url: str = Field(..., description="URL to access the resource")
23
+ size: Optional[int] = Field(None, description="File size in bytes")
24
+ mimetype: Optional[str] = Field(None, description="MIME type")
25
+ created: Optional[datetime] = Field(None, description="Creation date")
26
+ last_modified: Optional[datetime] = Field(None, description="Last modification date")
27
+
28
+ @field_validator("format")
29
+ @classmethod
30
+ def uppercase_format(cls, v: Optional[str]) -> Optional[str]:
31
+ """Normalize format to uppercase"""
32
+ return v.upper() if v else None
33
+
34
+
35
+ class Tag(BaseModel):
36
+ """Model for dataset tags"""
37
+
38
+ name: str = Field(..., description="Tag name")
39
+ display_name: Optional[str] = Field(None, description="Display name")
40
+ vocabulary_id: Optional[str] = Field(None, description="Vocabulary identifier")
41
+
42
+ class Config:
43
+ populate_by_name = True
44
+
45
+
46
+ class Organization(BaseModel):
47
+ """Model for data publishing organizations"""
48
+
49
+ id: str = Field(..., description="Organization identifier")
50
+ name: str = Field(..., description="Organization name")
51
+ title: str = Field(..., description="Organization title")
52
+ description: Optional[str] = Field(None, description="Organization description")
53
+ image_url: Optional[str] = Field(None, description="Organization logo URL")
54
+ created: Optional[datetime] = Field(None, description="Creation date")
55
+ package_count: Optional[int] = Field(0, description="Number of datasets")
56
+
57
+ class Config:
58
+ populate_by_name = True
59
+
60
+
61
+ class Dataset(BaseModel):
62
+ """Model for a complete dataset"""
63
+
64
+ id: str = Field(..., description="Dataset identifier")
65
+ name: str = Field(..., description="Dataset name (slug)")
66
+ title: str = Field(..., description="Dataset title")
67
+ author: Optional[str] = Field(None, description="Dataset author")
68
+ author_email: Optional[str] = Field(None, description="Author email")
69
+ maintainer: Optional[str] = Field(None, description="Dataset maintainer")
70
+ maintainer_email: Optional[str] = Field(None, description="Maintainer email")
71
+ license_id: Optional[str] = Field(None, description="License identifier")
72
+ notes: Optional[str] = Field(None, description="Dataset description/notes")
73
+ url: Optional[str] = Field(None, description="Dataset URL")
74
+ version: Optional[str] = Field(None, description="Dataset version")
75
+ state: Optional[str] = Field("active", description="Dataset state")
76
+ type: Optional[str] = Field("dataset", description="Resource type")
77
+
78
+ # Relationships
79
+ organization: Optional[Organization] = Field(None, description="Publishing organization")
80
+ resources: List[Resource] = Field(default_factory=list, description="Dataset resources")
81
+ tags: List[Tag] = Field(default_factory=list, description="Dataset tags")
82
+
83
+ # Metadata
84
+ metadata_created: Optional[datetime] = Field(None, description="Metadata creation date")
85
+ metadata_modified: Optional[datetime] = Field(None, description="Metadata modification date")
86
+
87
+ # Additional fields
88
+ extras: Optional[List[Dict[str, Any]]] = Field(None, description="Extra metadata")
89
+
90
+ class Config:
91
+ populate_by_name = True
92
+
93
+
94
+ class DatasetSearchResult(BaseModel):
95
+ """Model for dataset search results"""
96
+
97
+ count: int = Field(..., description="Total number of results")
98
+ results: List[Dataset] = Field(..., description="List of datasets")
99
+ facets: Optional[Dict[str, Any]] = Field(None, description="Search facets")
100
+ search_facets: Optional[Dict[str, Any]] = Field(None, description="Active search facets")
101
+
102
+ class Config:
103
+ populate_by_name = True
104
+
105
+
106
+ class ResourceSearchResult(BaseModel):
107
+ """Model for resource search results"""
108
+
109
+ count: int = Field(..., description="Total number of results")
110
+ results: List[Resource] = Field(..., description="List of resources")
111
+
112
+ class Config:
113
+ populate_by_name = True
114
+
115
+
116
+ class TagWithCount(BaseModel):
117
+ """Model for tags with usage count"""
118
+
119
+ name: str = Field(..., description="Tag name")
120
+ count: int = Field(..., description="Number of datasets using this tag")
121
+
122
+ class Config:
123
+ populate_by_name = True
124
+
125
+
126
+ class OrganizationWithDatasets(Organization):
127
+ """Extended organization model including datasets"""
128
+
129
+ packages: Optional[List[Dataset]] = Field(None, description="Organization's datasets")
130
+
131
+ class Config:
132
+ populate_by_name = True
133
+
134
+
135
+ class DataPortalStats(BaseModel):
136
+ """Model for general portal statistics"""
137
+
138
+ dataset_count: int = Field(..., description="Total number of datasets")
139
+ organization_count: int = Field(..., description="Total number of organizations")
140
+ resource_count: int = Field(..., description="Total number of resources")
141
+ tag_count: int = Field(..., description="Total number of unique tags")
142
+
143
+ class Config:
144
+ populate_by_name = True
145
+
146
+
147
+ class DatasetActivity(BaseModel):
148
+ """Model for dataset activity/history"""
149
+
150
+ id: str = Field(..., description="Activity identifier")
151
+ timestamp: datetime = Field(..., description="Activity timestamp")
152
+ user_id: Optional[str] = Field(None, description="User who performed the activity")
153
+ object_id: str = Field(..., description="Dataset identifier")
154
+ revision_id: Optional[str] = Field(None, description="Revision identifier")
155
+ activity_type: str = Field(..., description="Type of activity")
156
+ data: Optional[Dict[str, Any]] = Field(None, description="Additional activity data")
157
+
158
+ class Config:
159
+ populate_by_name = True