This commit is contained in:
2025-01-23 19:53:48 -03:00
parent c544337e9c
commit 07ab395c2f
562 changed files with 152672 additions and 271017 deletions

View File

@@ -0,0 +1,158 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Carregando\n",
"#começa tratando os dados e limpa a base\n",
"import pandas as pd\n",
"import numpy as np\n",
"import csv\n",
"\n",
"dados = pd.read_csv('BaciaRioDoce_filtro_setembro_abril.csv', sep=';', encoding='utf-8', decimal=',')\n",
"#dados.head(5)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Erro ao converter a coluna 'DATA': name 'dados_df' is not defined\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 12342 entries, 0 to 12341\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODIGO 12342 non-null int64 \n",
" 1 DATA 12342 non-null datetime64[ns]\n",
" 2 VALOR 12342 non-null object \n",
"dtypes: datetime64[ns](1), int64(1), object(1)\n",
"memory usage: 289.4+ KB\n",
"None\n",
" CODIGO DATA VALOR\n",
"0 10004 2020-09-01 0\n",
"1 10004 2020-09-02 0\n",
"2 10004 2020-09-03 0\n",
"3 10004 2020-09-04 0\n",
"4 10004 2020-09-05 0\n"
]
}
],
"source": [
"try:\n",
" dados['DATA'] = pd.to_datetime(dados['DATA'], format='%Y-%m-%d', errors='coerce')\n",
" if dados_df['DATA'].isnull().any():\n",
" dados(\"Aviso: Algumas datas foram convertidas para NaT (Not a Time) devido a formatos inválidos.\")\n",
"except Exception as e:\n",
" print(f\"Erro ao converter a coluna 'DATA': {e}\")\n",
"\n",
"# Verificar o DataFrame após a conversão\n",
"print(dados.info())\n",
"print(dados.head())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 12342 entries, 0 to 12341\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODIGO 12342 non-null int64 \n",
" 1 DATA 12342 non-null datetime64[ns]\n",
" 2 VALOR 12342 non-null float64 \n",
"dtypes: datetime64[ns](1), float64(1), int64(1)\n",
"memory usage: 289.4 KB\n",
"None\n",
" CODIGO DATA VALOR\n",
"0 10004 2020-09-01 0.0\n",
"1 10004 2020-09-02 0.0\n",
"2 10004 2020-09-03 0.0\n",
"3 10004 2020-09-04 0.0\n",
"4 10004 2020-09-05 0.0\n"
]
}
],
"source": [
"try:\n",
" dados['VALOR'] = dados['VALOR'].astype(str) # Garantir que todos os valores são strings\n",
" \n",
" # Substituir vírgulas por pontos\n",
" dados['VALOR'] = dados['VALOR'].str.replace(',', '.', regex=False)\n",
" \n",
" # Converter a coluna 'VALOR' para float\n",
" dados['VALOR'] = pd.to_numeric(dados['VALOR'], errors='coerce')\n",
" \n",
" # Tratar valores NaN substituindo por 0\n",
" dados['VALOR'] = dados['VALOR'].fillna(0)\n",
" \n",
"except Exception as e:\n",
" print(f\"Erro ao converter a coluna 'VALOR': {e}\")\n",
"\n",
"# Verificar o DataFrame após a conversão\n",
"print(dados.info())\n",
"print(dados.head())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}