Tmp: Difference between revisions
		
		
		
		
		
		Jump to navigation
		Jump to search
		
				
		
		
	
No edit summary  | 
				No edit summary  | 
				||
| (One intermediate revision by the same user not shown) | |||
| Line 1: | Line 1: | ||
===AUTOMATED===  | |||
* Set variables :  | |||
<pre>  | <pre>  | ||
#  | #export resultDiff=~/resultDiff  | ||
export filesList=""  | |||
</pre>  | |||
* Execute :  | |||
<syntaxhighlight lang="bash">  | |||
mkdir -p ~/old &&\  | |||
curl https://infocepo.com/wiki/index.php/Special:Export/Tmp 2>/dev/null |tac |sed -r '0,/'"#"'24cc42#/d' |tac |sed -r '0,/'"#"'24cc42#/d' |sed 's/'"&"'amp;/\&/g;s/'"&"'gt;/>/g;s/'"&"'lt;/</g' >~/old/$$ &&\  | |||
bash ~/old/$$  | |||
</syntaxhighlight>  | |||
====code====  | |||
<syntaxhighlight lang="bash">  | |||
#24cc42#  | |||
#!/usr/bin/env bash  | |||
# diff-multi-optimized.sh — multi‑file analysis & diff  | |||
# https://github.com/ynotopec/diff-multi  | |||
#  | |||
# Changes vs. original:  | |||
#   * Added usage & error reporting helpers  | |||
#   * Added -o to choose output dir, -k to keep temp  | |||
#   * Uses $(mktemp -d) once & avoids copy when hard‑link suffices  | |||
#   * Parallel (pigz) decompression when available  | |||
#   * Faster unique‑word extraction with LC_ALL=C grep + sort -u  | |||
#   * Reduces tmp files, pipes, and subshells  | |||
#   * Strict globbing (nullglob) & safe defaults  | |||
#   * POSIX‑portable where feasible  | |||
#  | |||
set -euo pipefail  | |||
shopt -s nullglob  | |||
IFS=$'\n\t'  | |||
LC_ALL=C  | |||
usage() {  | |||
  cat <<EOF  | |||
Usage: ${0##*/} [-o DIR] [-k] [FILE...]  | |||
  -o DIR   write results in DIR (default: ./diff-out)  | |||
  -k       keep temporary working directory  | |||
  FILE...  list of files to analyse (default: all plain files in cwd)  | |||
EOF  | |||
}  | |||
err() { printf 'Error: %s\n' "$*" >&2; exit 1; }  | |||
# --- options ---------------------------------------------------------------  | |||
OUTDIR="./diff-out"  | |||
KEEP_TMP=false  | |||
while getopts ":o:kh" opt; do  | |||
  case $opt in  | |||
    o) OUTDIR=$OPTARG ;;  | |||
    k) KEEP_TMP=true ;;  | |||
    h) usage; exit 0 ;;  | |||
    *) usage; exit 1 ;;  | |||
  esac  | |||
done  | |||
shift $((OPTIND-1))  | |||
# --- working directories ----------------------------------------------------  | |||
TMP_ROOT=$(mktemp -d -t diffmulti.XXXXXXXX)  | |||
trap '[[ $KEEP_TMP == true ]] || rm -rf "$TMP_ROOT"' EXIT INT TERM  | |||
FILES_DIR="$TMP_ROOT/files"  | |||
CACHE_DIR="$TMP_ROOT/cache"  | |||
mkdir -p "$FILES_DIR" "$CACHE_DIR" "$OUTDIR"  | |||
# --- gather input files -----------------------------------------------------  | |||
readarray -t INPUT_FILES < <(  | |||
  if [[ $# -gt 0 ]]; then printf '%s\n' "$@"  | |||
  else find . -maxdepth 1 -type f ! -name '.*' -print  | |||
  fi | sort -u  | |||
)  | )  | ||
if [[ ${#INPUT_FILES[@]} -eq 0 ]]; then err "no files given"; fi  | |||
log() { printf '[%(%F %T)T] %s\n' -1 "$*"; }  | |||
log "Copying ${#INPUT_FILES[@]} file(s) to workspace"  | |||
# hard‑link instead of copy where possible  | |||
for f in "${INPUT_FILES[@]}"; do  | |||
  ln -f "$f" "$FILES_DIR/" 2>/dev/null || cp -p "$f" "$FILES_DIR/"  | |||
done  | |||
# --- decompress .gz ---------------------------------------------------------  | |||
gz_files=("$FILES_DIR"/*.gz)  | |||
if (( ${#gz_files[@]} )); then  | |||
  log "Decompressing ${#gz_files[@]} .gz file(s)"  | |||
  if command -v pigz >/dev/null; then  | |||
     pigz -d --keep --force "${gz_files[@]}"  | |||
  else  | |||
    gunzip --force "${gz_files[@]}"  | |||
  fi  | |||
fi  | |||
# --- unique words -----------------------------------------------------------  | |||
STAT_WORDS="$TMP_ROOT/statWords"  | |||
log "Extracting unique words"  | |||
grep -hoE '\b[[:alnum:]_]+\b' "$FILES_DIR"/* \  | |||
  | tr '[:upper:]' '[:lower:]' \  | |||
  | sort -u > "$STAT_WORDS"  | |||
mapfile -t uniq_words < "$STAT_WORDS"  | |||
trigger=$(( (${#uniq_words[@]} + 1) / 2 ))  | |||
log "Trigger for common‑line filtering: > $trigger occurrence(s)"  | |||
# --- optional variable substitution ----------------------------------------  | |||
if [[ -f "$TMP_ROOT/statWords.vars" ]]; then  | |||
  log "Applying variable patterns from statWords.vars"  | |||
  cp -aT "$FILES_DIR" "$CACHE_DIR"  | |||
  while read -r var; do  | |||
    [[ $var ]] || continue  | |||
    sed -i -E "s/\b$var\b/\${${var}My}/g" "$CACHE_DIR"/*  | |||
  done < "$TMP_ROOT/statWords.vars"  | |||
else  | |||
  cp -aT "$FILES_DIR" "$CACHE_DIR"  | |||
fi  | |||
9  | # --- filter frequent common lines ------------------------------------------  | ||
log "Computing over‑represented lines"  | |||
sort "$CACHE_DIR"/* \  | |||
  | uniq -c \  | |||
  | awk -v t="$trigger" '$1 > t { sub(/^[[:space:]]+[0-9]+[[:space:]]+/,""); print }' \  | |||
  > "$TMP_ROOT/comm"  | |||
#   | # --- generate cleaned diffs -------------------------------------------------  | ||
#   | log "Generating diffs in $OUTDIR"  | ||
for f in "$CACHE_DIR"/*; do  | |||
  base=${f##*/}  | |||
  grep -Fvxf "$TMP_ROOT/comm" "$f" > "$OUTDIR/$base"  | |||
  chmod --reference="$f" "$OUTDIR/$base"  | |||
done  | |||
log "Finished 🎉  Results in $OUTDIR"  | |||
#24cc42#  | |||
</syntaxhighlight>  | |||
===Summary from ChatGPT===  | |||
This is a shell script written in Bash. The script starts by setting the "resultDiff" variable to a specific file path. The script then performs the following actions:  | |||
from   | #    Creates a directory called "old" in the home directory.  | ||
#    Changes the current working directory to the "old" directory.  | |||
#    Downloads a file from "https://infocepo.com/wiki/index.php/Special:Export/ResultDiff", filters the data, and saves it to a temporary file.  | |||
#    Runs the temporary file.  | |||
#    Returns to the previous working directory.  | |||
The second part of the code is a more complex shell script that performs multiple actions related to file analysis and comparison. The script does the following:  | |||
#   | #    Cleans up previous temporary files.  | ||
#    Makes two directories, "analyse$$/files" and "analyse$$/diff".  | |||
#    Copies all files from the "resultDiff" directory to the "analyse$$/files" directory, and unzips any ".gz" files.  | |||
#    Generates a list of unique words from all the files in the "analyse$$/files" directory.  | |||
#    Triggers an action if the number of files is above a certain value.  | |||
#    Replaces the words in the list with a placeholder, "varMy".  | |||
#    Compares the contents of all files in the "analyse$$/files" directory and creates a new file, "analyse$$/comm", with all common lines.  | |||
#    Filters out the lines in "analyse$$/comm" that are not present in more than half of the files.  | |||
#    Generates a "diff" file for each file in the "analyse$$/files" directory, showing the contents of the file and the missing common lines.  | |||
#    Cleans up temporary files.  | |||
#  | |||
Latest revision as of 00:56, 22 June 2025
AUTOMATED
- Set variables :
 
#export resultDiff=~/resultDiff export filesList=""
- Execute :
 
mkdir -p ~/old &&\
curl https://infocepo.com/wiki/index.php/Special:Export/Tmp 2>/dev/null |tac |sed -r '0,/'"#"'24cc42#/d' |tac |sed -r '0,/'"#"'24cc42#/d' |sed 's/'"&"'amp;/\&/g;s/'"&"'gt;/>/g;s/'"&"'lt;/</g' >~/old/$$ &&\
bash ~/old/$$
code
#24cc42#
#!/usr/bin/env bash
# diff-multi-optimized.sh — multi‑file analysis & diff
# https://github.com/ynotopec/diff-multi
#
# Changes vs. original:
#   * Added usage & error reporting helpers
#   * Added -o to choose output dir, -k to keep temp
#   * Uses $(mktemp -d) once & avoids copy when hard‑link suffices
#   * Parallel (pigz) decompression when available
#   * Faster unique‑word extraction with LC_ALL=C grep + sort -u
#   * Reduces tmp files, pipes, and subshells
#   * Strict globbing (nullglob) & safe defaults
#   * POSIX‑portable where feasible
#
set -euo pipefail
shopt -s nullglob
IFS=$'\n\t'
LC_ALL=C
usage() {
  cat <<EOF
Usage: ${0##*/} [-o DIR] [-k] [FILE...]
  -o DIR   write results in DIR (default: ./diff-out)
  -k       keep temporary working directory
  FILE...  list of files to analyse (default: all plain files in cwd)
EOF
}
err() { printf 'Error: %s\n' "$*" >&2; exit 1; }
# --- options ---------------------------------------------------------------
OUTDIR="./diff-out"
KEEP_TMP=false
while getopts ":o:kh" opt; do
  case $opt in
    o) OUTDIR=$OPTARG ;;
    k) KEEP_TMP=true ;;
    h) usage; exit 0 ;;
    *) usage; exit 1 ;;
  esac
done
shift $((OPTIND-1))
# --- working directories ----------------------------------------------------
TMP_ROOT=$(mktemp -d -t diffmulti.XXXXXXXX)
trap '[[ $KEEP_TMP == true ]] || rm -rf "$TMP_ROOT"' EXIT INT TERM
FILES_DIR="$TMP_ROOT/files"
CACHE_DIR="$TMP_ROOT/cache"
mkdir -p "$FILES_DIR" "$CACHE_DIR" "$OUTDIR"
# --- gather input files -----------------------------------------------------
readarray -t INPUT_FILES < <(
  if [[ $# -gt 0 ]]; then printf '%s\n' "$@"
  else find . -maxdepth 1 -type f ! -name '.*' -print
  fi | sort -u
)
if [[ ${#INPUT_FILES[@]} -eq 0 ]]; then err "no files given"; fi
log() { printf '[%(%F %T)T] %s\n' -1 "$*"; }
log "Copying ${#INPUT_FILES[@]} file(s) to workspace"
# hard‑link instead of copy where possible
for f in "${INPUT_FILES[@]}"; do
  ln -f "$f" "$FILES_DIR/" 2>/dev/null || cp -p "$f" "$FILES_DIR/"
done
# --- decompress .gz ---------------------------------------------------------
gz_files=("$FILES_DIR"/*.gz)
if (( ${#gz_files[@]} )); then
  log "Decompressing ${#gz_files[@]} .gz file(s)"
  if command -v pigz >/dev/null; then
    pigz -d --keep --force "${gz_files[@]}"
  else
    gunzip --force "${gz_files[@]}"
  fi
fi
# --- unique words -----------------------------------------------------------
STAT_WORDS="$TMP_ROOT/statWords"
log "Extracting unique words"
grep -hoE '\b[[:alnum:]_]+\b' "$FILES_DIR"/* \
  | tr '[:upper:]' '[:lower:]' \
  | sort -u > "$STAT_WORDS"
mapfile -t uniq_words < "$STAT_WORDS"
trigger=$(( (${#uniq_words[@]} + 1) / 2 ))
log "Trigger for common‑line filtering: > $trigger occurrence(s)"
# --- optional variable substitution ----------------------------------------
if [[ -f "$TMP_ROOT/statWords.vars" ]]; then
  log "Applying variable patterns from statWords.vars"
  cp -aT "$FILES_DIR" "$CACHE_DIR"
  while read -r var; do
    [[ $var ]] || continue
    sed -i -E "s/\b$var\b/\${${var}My}/g" "$CACHE_DIR"/*
  done < "$TMP_ROOT/statWords.vars"
else
  cp -aT "$FILES_DIR" "$CACHE_DIR"
fi
# --- filter frequent common lines ------------------------------------------
log "Computing over‑represented lines"
sort "$CACHE_DIR"/* \
  | uniq -c \
  | awk -v t="$trigger" '$1 > t { sub(/^[[:space:]]+[0-9]+[[:space:]]+/,""); print }' \
  > "$TMP_ROOT/comm"
# --- generate cleaned diffs -------------------------------------------------
log "Generating diffs in $OUTDIR"
for f in "$CACHE_DIR"/*; do
  base=${f##*/}
  grep -Fvxf "$TMP_ROOT/comm" "$f" > "$OUTDIR/$base"
  chmod --reference="$f" "$OUTDIR/$base"
done
log "Finished 🎉  Results in $OUTDIR"
#24cc42#
Summary from ChatGPT
This is a shell script written in Bash. The script starts by setting the "resultDiff" variable to a specific file path. The script then performs the following actions:
- Creates a directory called "old" in the home directory.
 - Changes the current working directory to the "old" directory.
 - Downloads a file from "https://infocepo.com/wiki/index.php/Special:Export/ResultDiff", filters the data, and saves it to a temporary file.
 - Runs the temporary file.
 - Returns to the previous working directory.
 
The second part of the code is a more complex shell script that performs multiple actions related to file analysis and comparison. The script does the following:
- Cleans up previous temporary files.
 - Makes two directories, "analyse$$/files" and "analyse$$/diff".
 - Copies all files from the "resultDiff" directory to the "analyse$$/files" directory, and unzips any ".gz" files.
 - Generates a list of unique words from all the files in the "analyse$$/files" directory.
 - Triggers an action if the number of files is above a certain value.
 - Replaces the words in the list with a placeholder, "varMy".
 - Compares the contents of all files in the "analyse$$/files" directory and creates a new file, "analyse$$/comm", with all common lines.
 - Filters out the lines in "analyse$$/comm" that are not present in more than half of the files.
 - Generates a "diff" file for each file in the "analyse$$/files" directory, showing the contents of the file and the missing common lines.
 - Cleans up temporary files.