setAnalyze(false); if ($GLOBALS['plugin_param'] !== 'table') { $this->setAnalyze(true); } $importPluginProperties = new ImportPluginProperties(); $importPluginProperties->setText('CSV'); $importPluginProperties->setExtension('csv'); $importPluginProperties->setOptionsText(__('Options')); // create the root group that will be the options field for // $importPluginProperties // this will be shown as "Format specific options" $importSpecificOptions = new OptionsPropertyRootGroup('Format Specific Options'); $generalOptions = $this->getGeneralOptions(); if ($GLOBALS['plugin_param'] !== 'table') { $leaf = new TextPropertyItem( 'new_tbl_name', __( 'Name of the new table (optional):' ) ); $generalOptions->addProperty($leaf); if ($GLOBALS['plugin_param'] === 'server') { $leaf = new TextPropertyItem( 'new_db_name', __( 'Name of the new database (optional):' ) ); $generalOptions->addProperty($leaf); } $leaf = new NumberPropertyItem( 'partial_import', __( 'Import these many number of rows (optional):' ) ); $generalOptions->addProperty($leaf); $leaf = new BoolPropertyItem( 'col_names', __( 'The first line of the file contains the table column names' . ' (if this is unchecked, the first line will become part' . ' of the data)' ) ); $generalOptions->addProperty($leaf); } else { $leaf = new NumberPropertyItem( 'partial_import', __( 'Import these many number of rows (optional):' ) ); $generalOptions->addProperty($leaf); $hint = new Message( __( 'If the data in each row of the file is not' . ' in the same order as in the database, list the corresponding' . ' column names here. Column names must be separated by commas' . ' and not enclosed in quotations.' ) ); $leaf = new TextPropertyItem( 'columns', __('Column names:') . ' ' . Generator::showHint($hint->getMessage()) ); $generalOptions->addProperty($leaf); } $leaf = new BoolPropertyItem( 'ignore', __('Do not abort on INSERT error') ); $generalOptions->addProperty($leaf); // add the main group to the root group $importSpecificOptions->addProperty($generalOptions); // set the options for the import plugin property item $importPluginProperties->setOptions($importSpecificOptions); return $importPluginProperties; } /** * Handles the whole import logic * * @param array $sql_data 2-element array with sql data */ public function doImport(?File $importHandle = null, array &$sql_data = []): void { global $error, $message, $dbi; global $db, $table, $csv_terminated, $csv_enclosed, $csv_escaped, $csv_new_line, $csv_columns, $errorUrl; // $csv_replace and $csv_ignore should have been here, // but we use directly from $_POST global $timeout_passed, $finished; $replacements = [ '\\n' => "\n", '\\t' => "\t", '\\r' => "\r", ]; $csv_terminated = strtr($csv_terminated, $replacements); $csv_enclosed = strtr($csv_enclosed, $replacements); $csv_escaped = strtr($csv_escaped, $replacements); $csv_new_line = strtr($csv_new_line, $replacements); [$error, $message] = $this->buildErrorsForParams( $csv_terminated, $csv_enclosed, $csv_escaped, $csv_new_line, (string) $errorUrl ); [$sql_template, $required_fields, $fields] = $this->getSqlTemplateAndRequiredFields($db, $table, $csv_columns); // Defaults for parser $i = 0; $len = 0; $lastlen = null; $line = 1; $lasti = -1; $values = []; $csv_finish = false; $max_lines = 0; // defaults to 0 (get all the lines) /** * If we get a negative value, probably someone changed min value * attribute in DOM or there is an integer overflow, whatever be * the case, get all the lines. */ if (isset($_REQUEST['csv_partial_import']) && $_REQUEST['csv_partial_import'] > 0) { $max_lines = $_REQUEST['csv_partial_import']; } $max_lines_constraint = $max_lines + 1; // if the first row has to be counted as column names, include one more row in the max lines if (isset($_REQUEST['csv_col_names'])) { $max_lines_constraint++; } $tempRow = []; $rows = []; $col_names = []; $tables = []; $buffer = ''; $col_count = 0; $max_cols = 0; $csv_terminated_len = mb_strlen($csv_terminated); while (! ($finished && $i >= $len) && ! $error && ! $timeout_passed) { $data = $this->import->getNextChunk($importHandle); if ($data === false) { // subtract data we didn't handle yet and stop processing $GLOBALS['offset'] -= strlen($buffer); break; } if ($data !== true) { // Append new data to buffer $buffer .= $data; unset($data); // Force a trailing new line at EOF to prevent parsing problems if ($finished && $buffer) { $finalch = mb_substr($buffer, -1); if ($csv_new_line === 'auto' && $finalch != "\r" && $finalch != "\n") { $buffer .= "\n"; } elseif ($csv_new_line !== 'auto' && $finalch != $csv_new_line) { $buffer .= $csv_new_line; } } // Do not parse string when we're not at the end // and don't have new line inside if ( ($csv_new_line === 'auto' && ! str_contains($buffer, "\r") && ! str_contains($buffer, "\n")) || ($csv_new_line !== 'auto' && ! str_contains($buffer, $csv_new_line)) ) { continue; } } // Current length of our buffer $len = mb_strlen($buffer); // Currently parsed char $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } while ($i < $len) { // Deadlock protection if ($lasti == $i && $lastlen == $len) { $message = Message::error( __('Invalid format of CSV input on line %d.') ); $message->addParam($line); $error = true; break; } $lasti = $i; $lastlen = $len; // This can happen with auto EOL and \r at the end of buffer if (! $csv_finish) { // Grab empty field if ($ch == $csv_terminated) { if ($i == $len - 1) { break; } $values[] = ''; $i++; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } continue; } // Grab one field $fallbacki = $i; if ($ch == $csv_enclosed) { if ($i == $len - 1) { break; } $need_end = true; $i++; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } } else { $need_end = false; } $fail = false; $value = ''; while ( ($need_end && ($ch != $csv_enclosed || $csv_enclosed == $csv_escaped)) || (! $need_end && ! ($ch == $csv_terminated || $ch == $csv_new_line || ($csv_new_line === 'auto' && ($ch == "\r" || $ch == "\n")))) ) { if ($ch == $csv_escaped) { if ($i == $len - 1) { $fail = true; break; } $i++; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } if ( $csv_enclosed == $csv_escaped && ($ch == $csv_terminated || $ch == $csv_new_line || ($csv_new_line === 'auto' && ($ch == "\r" || $ch == "\n"))) ) { break; } } $value .= $ch; if ($i == $len - 1) { if (! $finished) { $fail = true; } break; } $i++; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len <= 1 || $ch != $csv_terminated[0]) { continue; } $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } // unquoted NULL string if ($need_end === false && $value === 'NULL') { $value = null; } if ($fail) { $i = $fallbacki; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $i += $csv_terminated_len - 1; } break; } // Need to strip trailing enclosing char? if ($need_end && $ch == $csv_enclosed) { if ($finished && $i == $len - 1) { $ch = null; } elseif ($i == $len - 1) { $i = $fallbacki; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $i += $csv_terminated_len - 1; } break; } else { $i++; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } } } // Are we at the end? if ( $ch == $csv_new_line || ($csv_new_line === 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1) ) { $csv_finish = true; } // Go to next char if ($ch == $csv_terminated) { if ($i == $len - 1) { $i = $fallbacki; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $i += $csv_terminated_len - 1; } break; } $i++; $ch = mb_substr($buffer, $i, 1); if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) { $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len); $i += $csv_terminated_len - 1; } } // If everything went okay, store value $values[] = $value; } // End of line if ( ! $csv_finish && $ch != $csv_new_line && ($csv_new_line !== 'auto' || ($ch != "\r" && $ch != "\n")) ) { continue; } if ($csv_new_line === 'auto' && $ch == "\r") { // Handle "\r\n" if ($i >= ($len - 2) && ! $finished) { break; // We need more data to decide new line } if (mb_substr($buffer, $i + 1, 1) == "\n") { $i++; } } // We didn't parse value till the end of line, so there was // empty one if (! $csv_finish) { $values[] = ''; } if ($this->getAnalyze()) { foreach ($values as $val) { $tempRow[] = $val; ++$col_count; } if ($col_count > $max_cols) { $max_cols = $col_count; } $col_count = 0; $rows[] = $tempRow; $tempRow = []; } else { // Do we have correct count of values? if (count($values) != $required_fields) { // Hack for excel if ($values[count($values) - 1] !== ';') { $message = Message::error( __( 'Invalid column count in CSV input on line %d.' ) ); $message->addParam($line); $error = true; break; } unset($values[count($values) - 1]); } $first = true; $sql = $sql_template; foreach ($values as $val) { if (! $first) { $sql .= ', '; } if ($val === null) { $sql .= 'NULL'; } else { $sql .= '\'' . $dbi->escapeString($val) . '\''; } $first = false; } $sql .= ')'; if (isset($_POST['csv_replace'])) { $sql .= ' ON DUPLICATE KEY UPDATE '; foreach ($fields as $field) { $fieldName = Util::backquote($field['Field']); $sql .= $fieldName . ' = VALUES(' . $fieldName . '), '; } $sql = rtrim($sql, ', '); } /** * @todo maybe we could add original line to verbose * SQL in comment */ $this->import->runQuery($sql, $sql, $sql_data); } $line++; $csv_finish = false; $values = []; $buffer = mb_substr($buffer, $i + 1); $len = mb_strlen($buffer); $i = 0; $lasti = -1; $ch = mb_substr($buffer, 0, 1); if ($max_lines > 0 && $line == $max_lines_constraint) { $finished = 1; break; } } if ($max_lines > 0 && $line == $max_lines_constraint) { $finished = 1; break; } } if ($this->getAnalyze()) { /* Fill out all rows */ $num_rows = count($rows); for ($i = 0; $i < $num_rows; ++$i) { for ($j = count($rows[$i]); $j < $max_cols; ++$j) { $rows[$i][] = 'NULL'; } } $col_names = $this->getColumnNames($col_names, $max_cols, $rows); /* Remove the first row if it contains the column names */ if (isset($_REQUEST['csv_col_names'])) { array_shift($rows); } $tbl_name = $this->getTableNameFromImport((string) $db); $tables[] = [ $tbl_name, $col_names, $rows, ]; /* Obtain the best-fit MySQL types for each column */ $analyses = []; $analyses[] = $this->import->analyzeTable($tables[0]); /** * string $db_name (no backquotes) * * array $table = array(table_name, array() column_names, array()() rows) * array $tables = array of "$table"s * * array $analysis = array(array() column_types, array() column_sizes) * array $analyses = array of "$analysis"s * * array $create = array of SQL strings * * array $options = an associative array of options */ /* Set database name to the currently selected one, if applicable, * Otherwise, check if user provided the database name in the request, * if not, set the default name */ if (isset($_REQUEST['csv_new_db_name']) && strlen($_REQUEST['csv_new_db_name']) > 0) { $newDb = $_REQUEST['csv_new_db_name']; } else { $result = $dbi->fetchResult('SHOW DATABASES'); $newDb = 'CSV_DB ' . (count($result) + 1); } [$db_name, $options] = $this->getDbnameAndOptions($db, $newDb); /* Non-applicable parameters */ $create = null; /* Created and execute necessary SQL statements from data */ $this->import->buildSql($db_name, $tables, $analyses, $create, $options, $sql_data); unset($tables, $analyses); } // Commit any possible data in buffers $this->import->runQuery('', '', $sql_data); if (count($values) == 0 || $error !== false) { return; } $message = Message::error( __('Invalid format of CSV input on line %d.') ); $message->addParam($line); $error = true; } private function buildErrorsForParams( string $csvTerminated, string $csvEnclosed, string $csvEscaped, string $csvNewLine, string $errUrl ): array { global $error, $message; $param_error = false; if (strlen($csvTerminated) === 0) { $message = Message::error( __('Invalid parameter for CSV import: %s') ); $message->addParam(__('Columns terminated with')); $error = true; $param_error = true; // The default dialog of MS Excel when generating a CSV produces a // semi-colon-separated file with no chance of specifying the // enclosing character. Thus, users who want to import this file // tend to remove the enclosing character on the Import dialog. // I could not find a test case where having no enclosing characters // confuses this script. // But the parser won't work correctly with strings so we allow just // one character. } elseif (mb_strlen($csvEnclosed) > 1) { $message = Message::error( __('Invalid parameter for CSV import: %s') ); $message->addParam(__('Columns enclosed with')); $error = true; $param_error = true; // I could not find a test case where having no escaping characters // confuses this script. // But the parser won't work correctly with strings so we allow just // one character. } elseif (mb_strlen($csvEscaped) > 1) { $message = Message::error( __('Invalid parameter for CSV import: %s') ); $message->addParam(__('Columns escaped with')); $error = true; $param_error = true; } elseif (mb_strlen($csvNewLine) != 1 && $csvNewLine !== 'auto') { $message = Message::error( __('Invalid parameter for CSV import: %s') ); $message->addParam(__('Lines terminated with')); $error = true; $param_error = true; } // If there is an error in the parameters entered, // indicate that immediately. if ($param_error) { Generator::mysqlDie( $message->getMessage(), '', false, $errUrl ); } return [$error, $message]; } private function getTableNameFromImport(string $databaseName): string { global $import_file_name, $dbi; $importFileName = basename($import_file_name, '.csv'); $importFileName = mb_strtolower($importFileName); $importFileName = (string) preg_replace('/[^a-zA-Z0-9_]/', '_', $importFileName); // get new table name, if user didn't provide one, set the default name if (isset($_REQUEST['csv_new_tbl_name']) && strlen($_REQUEST['csv_new_tbl_name']) > 0) { return $_REQUEST['csv_new_tbl_name']; } if (mb_strlen($databaseName)) { $result = $dbi->fetchResult('SHOW TABLES'); // logic to get table name from filename // if no table then use filename as table name if (count($result) === 0) { return $importFileName; } // check to see if {filename} as table exist $nameArray = preg_grep('/' . $importFileName . '/isU', $result); // if no use filename as table name if ($nameArray === false || count($nameArray) === 0) { return $importFileName; } // check if {filename}_ as table exist $nameArray = preg_grep('/' . $importFileName . '_/isU', $result); if ($nameArray === false) { return $importFileName; } return $importFileName . '_' . (count($nameArray) + 1); } return $importFileName; } private function getColumnNames(array $columnNames, int $maxCols, array $rows): array { if (isset($_REQUEST['csv_col_names'])) { $columnNames = array_splice($rows, 0, 1); $columnNames = $columnNames[0]; // MySQL column names can't end with a space character. foreach ($columnNames as $key => $col_name) { $columnNames[$key] = rtrim($col_name); } } if ((isset($columnNames) && count($columnNames) != $maxCols) || ! isset($columnNames)) { // Fill out column names for ($i = 0; $i < $maxCols; ++$i) { $columnNames[] = 'COL ' . ($i + 1); } } return $columnNames; } private function getSqlTemplateAndRequiredFields( ?string $db, ?string $table, ?string $csvColumns ): array { global $dbi, $error, $message; $requiredFields = 0; $sqlTemplate = ''; $fields = []; if (! $this->getAnalyze() && $db !== null && $table !== null) { $sqlTemplate = 'INSERT'; if (isset($_POST['csv_ignore'])) { $sqlTemplate .= ' IGNORE'; } $sqlTemplate .= ' INTO ' . Util::backquote($table); $tmp_fields = $dbi->getColumns($db, $table); if (empty($csvColumns)) { $fields = $tmp_fields; } else { $sqlTemplate .= ' ('; $fields = []; $tmp = preg_split('/,( ?)/', $csvColumns); if ($tmp === false) { $tmp = []; } foreach ($tmp as $val) { if (count($fields) > 0) { $sqlTemplate .= ', '; } /* Trim also `, if user already included backquoted fields */ $val = trim($val, " \t\r\n\0\x0B`"); $found = false; foreach ($tmp_fields as $field) { if ($field['Field'] == $val) { $found = true; break; } } if (! $found) { $message = Message::error( __( 'Invalid column (%s) specified! Ensure that columns' . ' names are spelled correctly, separated by commas' . ', and not enclosed in quotes.' ) ); $message->addParam($val); $error = true; break; } if (isset($field)) { $fields[] = $field; } $sqlTemplate .= Util::backquote($val); } $sqlTemplate .= ') '; } $requiredFields = count($fields); $sqlTemplate .= ' VALUES ('; } return [$sqlTemplate, $requiredFields, $fields]; } /** * Read the expected column_separated_with String of length * $csv_terminated_len from the $buffer * into variable $ch and return the read string $ch * * @param string $buffer The original string buffer read from * csv file * @param string $ch Partially read "column Separated with" * string, also used to return after * reading length equal $csv_terminated_len * @param int $i Current read counter of buffer string * @param int $csv_terminated_len The length of "column separated with" * String * * @return string */ public function readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len) { for ($j = 0; $j < $csv_terminated_len - 1; $j++) { $i++; $ch .= mb_substr($buffer, $i, 1); } return $ch; } /* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */ /** * Returns true if the table should be analyzed, false otherwise */ private function getAnalyze(): bool { return $this->analyze; } /** * Sets to true if the table should be analyzed, false otherwise * * @param bool $analyze status */ private function setAnalyze($analyze): void { $this->analyze = $analyze; } }