#!/bin/bash # Fetch and merge text and JSON files from URLs # Usage: ./create.sh [output_file] OUTPUT_FILE="${1:-merged_output.txt}" TEMP_DIR=$(mktemp -d) FINAL_TEMP="$TEMP_DIR/combined.txt" # Color codes for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # ============================================================================= # CONFIGURE YOUR URLS AND COMMENTS HERE # ============================================================================= URLS_AND_COMMENTS=( "https://raw.githubusercontent.com/dsojevic/profanity-list/refs/heads/main/en.txt" "https://raw.githubusercontent.com/dsojevic/profanity-list/refs/heads/main/emoji.txt" "https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/refs/heads/master/de" "https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/refs/heads/master/en" "https://raw.githubusercontent.com/zacanger/profane-words/refs/heads/master/words.json" ) # ============================================================================= # Check if jq is available for JSON processing if ! command -v jq &> /dev/null; then echo -e "${YELLOW}Warning: jq not found. JSON files will be skipped.${NC}" echo -e "${YELLOW}Install jq with: apt-get install jq (Ubuntu/Debian) or brew install jq (macOS)${NC}" HAS_JQ=false else HAS_JQ=true fi echo -e "${GREEN}Text/JSON File Merger${NC}" echo "Processing $(echo "${URLS_AND_COMMENTS[@]}" | grep -c 'https://' || true) URLs..." # Process URLs and comments for line in "${URLS_AND_COMMENTS[@]}"; do # Handle comments if [[ "$line" =~ ^[[:space:]]*# ]]; then echo "$line" >> "$FINAL_TEMP" continue fi # Skip lines that don't look like URLs if [[ ! "$line" =~ ^https?:// ]]; then continue fi echo -e "${YELLOW}Fetching: $line${NC}" # Create temporary file for this URL URL_TEMP="$TEMP_DIR/url_content.tmp" # Fetch the URL if curl -s -f "$line" -o "$URL_TEMP" 2>/dev/null; then echo -e "${GREEN}✓ Successfully fetched${NC}" # Check if the content is JSON if [ "$HAS_JQ" = true ] && jq empty "$URL_TEMP" 2>/dev/null; then echo -e "${BLUE} → Detected JSON format, extracting strings...${NC}" # Try to extract strings from JSON array or object # Handle different JSON structures if jq -e 'type == "array"' "$URL_TEMP" >/dev/null 2>&1; then # JSON array - extract all string values jq -r '.[] | select(type == "string")' "$URL_TEMP" >> "$FINAL_TEMP" 2>/dev/null || { echo -e "${RED} ✗ Failed to parse JSON array${NC}" echo "# ERROR: Could not parse JSON from $line" >> "$FINAL_TEMP" } elif jq -e 'type == "object"' "$URL_TEMP" >/dev/null 2>&1; then # JSON object - extract all string values jq -r 'recurse | select(type == "string")' "$URL_TEMP" >> "$FINAL_TEMP" 2>/dev/null || { echo -e "${RED} ✗ Failed to parse JSON object${NC}" echo "# ERROR: Could not parse JSON from $line" >> "$FINAL_TEMP" } else echo -e "${RED} ✗ Unsupported JSON structure${NC}" echo "# ERROR: Unsupported JSON structure from $line" >> "$FINAL_TEMP" fi else # Regular text file - append as-is echo -e "${BLUE} → Processing as text file...${NC}" cat "$URL_TEMP" >> "$FINAL_TEMP" fi # Clean up URL temp file rm -f "$URL_TEMP" else echo -e "${RED}✗ Failed to fetch: $line${NC}" echo "# ERROR: Could not fetch $line" >> "$FINAL_TEMP" fi done # Check if we have any content if [ ! -s "$FINAL_TEMP" ]; then echo -e "${RED}No content to process${NC}" rm -rf "$TEMP_DIR" exit 1 fi echo -e "${YELLOW}Processing content...${NC}" # Remove duplicates, sort alphabetically, and save to output file # Keep comments at the top, sort the rest { grep '^#' "$FINAL_TEMP" 2>/dev/null || true grep -v '^#' "$FINAL_TEMP" 2>/dev/null | grep -v '^[[:space:]]*$' | sort -u } > "$OUTPUT_FILE" # Cleanup rm -rf "$TEMP_DIR" # Show results line_count=$(wc -l < "$OUTPUT_FILE") echo -e "${GREEN}✓ Complete! Merged content saved to: $OUTPUT_FILE${NC}" echo -e "${GREEN}Total lines: $line_count${NC}" # Show a preview of the content if [ -s "$OUTPUT_FILE" ]; then echo -e "${YELLOW}Preview (first 10 non-comment lines):${NC}" grep -v '^#' "$OUTPUT_FILE" | head -10 fi